RNP v2

In [80]:
from __future__ import print_function
import os.path
import dalmatian as dm
import pandas as pd
import sys
sys.path.insert(0, '../../')
#import Datanalytics as da 
from JKBio import TerraFunction as terra
%load_ext autoreload
%autoreload 2
from JKBio import Helper as h

import pickle
from taigapy import TaigaClient
tc = TaigaClient()
import numpy as np
import itertools

from bokeh.plotting import *
from bokeh.models import HoverTool
output_notebook()
import matplotlib.pyplot as plt
%load_ext rpy2.ipython
import seaborn as sns
import gseapy
import matplotlib.pyplot as plt
import networkx as nx
from JKBio.helper import pyDESeq2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import AgglomerativeClustering, DBSCAN

from sklearn.manifold import MDS, TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Loading BokehJS ...
The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython

getting data

In [7]:
! gsutil mv gs://transfer-amlproject/*MP7624* gs://transfer-amlproject/RNPv2/
Copying gs://transfer-amlproject/20200304_10_MP7624_S10_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_10_MP7624_S10_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_10_MP7624_S10_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_10_MP7624_S10_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_11_MP7624_S11_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_11_MP7624_S11_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_11_MP7624_S11_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_11_MP7624_S11_R2_001.fastq.gz...     

==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying gs://transfer-amlproject/20200304_12_MP7624_S12_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_12_MP7624_S12_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_12_MP7624_S12_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_12_MP7624_S12_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_13_MP7624_S13_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_13_MP7624_S13_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_13_MP7624_S13_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_13_MP7624_S13_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_14_MP7624_S14_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_14_MP7624_S14_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_14_MP7624_S14_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_14_MP7624_S14_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_15_MP7624_S15_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_15_MP7624_S15_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_15_MP7624_S15_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_15_MP7624_S15_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_16_MP7624_S16_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_16_MP7624_S16_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_16_MP7624_S16_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_16_MP7624_S16_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_17_MP7624_S17_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_17_MP7624_S17_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_17_MP7624_S17_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_17_MP7624_S17_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_18_MP7624_S18_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_18_MP7624_S18_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_18_MP7624_S18_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_18_MP7624_S18_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_19_MP7624_S19_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_19_MP7624_S19_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_19_MP7624_S19_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_19_MP7624_S19_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_1_MP7624_S1_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_1_MP7624_S1_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_1_MP7624_S1_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_1_MP7624_S1_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_20_MP7624_S20_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_20_MP7624_S20_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_20_MP7624_S20_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_20_MP7624_S20_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_21_MP7624_S21_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_21_MP7624_S21_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_21_MP7624_S21_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_21_MP7624_S21_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_22_MP7624_S22_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_22_MP7624_S22_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_22_MP7624_S22_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_22_MP7624_S22_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_23_MP7624_S23_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_23_MP7624_S23_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_23_MP7624_S23_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_23_MP7624_S23_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_24_MP7624_S24_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_24_MP7624_S24_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_24_MP7624_S24_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_24_MP7624_S24_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_25_MP7624_S25_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_25_MP7624_S25_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_25_MP7624_S25_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_25_MP7624_S25_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_26_MP7624_S26_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_26_MP7624_S26_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_26_MP7624_S26_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_26_MP7624_S26_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_27_MP7624_S27_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_27_MP7624_S27_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_27_MP7624_S27_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_27_MP7624_S27_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_28_MP7624_S28_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_28_MP7624_S28_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_28_MP7624_S28_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_28_MP7624_S28_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_29_MP7624_S29_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_29_MP7624_S29_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_29_MP7624_S29_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_29_MP7624_S29_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_2_MP7624_S2_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_2_MP7624_S2_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_2_MP7624_S2_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_2_MP7624_S2_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_30_MP7624_S30_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_30_MP7624_S30_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_30_MP7624_S30_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_30_MP7624_S30_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_31_MP7624_S31_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_31_MP7624_S31_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_31_MP7624_S31_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_31_MP7624_S31_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_32_MP7624_S32_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_32_MP7624_S32_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_32_MP7624_S32_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_32_MP7624_S32_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_33_MP7624_S33_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_33_MP7624_S33_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_33_MP7624_S33_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_33_MP7624_S33_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_34_MP7624_S34_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_34_MP7624_S34_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_34_MP7624_S34_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_34_MP7624_S34_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_35_MP7624_S35_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_35_MP7624_S35_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_35_MP7624_S35_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_35_MP7624_S35_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_36_MP7624_S36_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_36_MP7624_S36_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_36_MP7624_S36_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_36_MP7624_S36_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_37_MP7624_S37_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_37_MP7624_S37_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_37_MP7624_S37_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_37_MP7624_S37_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_38_MP7624_S38_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_38_MP7624_S38_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_38_MP7624_S38_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_38_MP7624_S38_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_39_MP7624_S39_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_39_MP7624_S39_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_39_MP7624_S39_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_39_MP7624_S39_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_3_MP7624_S3_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_3_MP7624_S3_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_3_MP7624_S3_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_3_MP7624_S3_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_40_MP7624_S40_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_40_MP7624_S40_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_40_MP7624_S40_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_40_MP7624_S40_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_41_MP7624_S41_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_41_MP7624_S41_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_41_MP7624_S41_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_41_MP7624_S41_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_42_MP7624_S42_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_42_MP7624_S42_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_42_MP7624_S42_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_42_MP7624_S42_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_43_MP7624_S43_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_43_MP7624_S43_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_43_MP7624_S43_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_43_MP7624_S43_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_44_MP7624_S44_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_44_MP7624_S44_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_44_MP7624_S44_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_44_MP7624_S44_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_45_MP7624_S45_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_45_MP7624_S45_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_45_MP7624_S45_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_45_MP7624_S45_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_46_MP7624_S46_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_46_MP7624_S46_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_46_MP7624_S46_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_46_MP7624_S46_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_47_MP7624_S47_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_47_MP7624_S47_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_47_MP7624_S47_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_47_MP7624_S47_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_48_MP7624_S48_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_48_MP7624_S48_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_48_MP7624_S48_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_48_MP7624_S48_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_49_MP7624_S49_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_49_MP7624_S49_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_49_MP7624_S49_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_49_MP7624_S49_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_4_MP7624_S4_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_4_MP7624_S4_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_4_MP7624_S4_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_4_MP7624_S4_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_50_MP7624_S50_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_50_MP7624_S50_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_50_MP7624_S50_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_50_MP7624_S50_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_51_MP7624_S51_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_51_MP7624_S51_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_51_MP7624_S51_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_51_MP7624_S51_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_52_MP7624_S52_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_52_MP7624_S52_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_52_MP7624_S52_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_52_MP7624_S52_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_53_MP7624_S53_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_53_MP7624_S53_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_53_MP7624_S53_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_53_MP7624_S53_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_54_MP7624_S54_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_54_MP7624_S54_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_54_MP7624_S54_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_54_MP7624_S54_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_55_MP7624_S55_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_55_MP7624_S55_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_55_MP7624_S55_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_55_MP7624_S55_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_56_MP7624_S56_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_56_MP7624_S56_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_56_MP7624_S56_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_56_MP7624_S56_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_57_MP7624_S57_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_57_MP7624_S57_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_57_MP7624_S57_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_57_MP7624_S57_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_58_MP7624_S58_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_58_MP7624_S58_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_58_MP7624_S58_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_58_MP7624_S58_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_59_MP7624_S59_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_59_MP7624_S59_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_59_MP7624_S59_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_59_MP7624_S59_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_5_MP7624_S5_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_5_MP7624_S5_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_5_MP7624_S5_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_5_MP7624_S5_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_60_MP7624_S60_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_60_MP7624_S60_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_60_MP7624_S60_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_60_MP7624_S60_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_61_MP7624_S61_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_61_MP7624_S61_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_61_MP7624_S61_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_61_MP7624_S61_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_62_MP7624_S62_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_62_MP7624_S62_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_62_MP7624_S62_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_62_MP7624_S62_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_63_MP7624_S63_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_63_MP7624_S63_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_63_MP7624_S63_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_63_MP7624_S63_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_64_MP7624_S64_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_64_MP7624_S64_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_64_MP7624_S64_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_64_MP7624_S64_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_65_MP7624_S65_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_65_MP7624_S65_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_65_MP7624_S65_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_65_MP7624_S65_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_66_MP7624_S66_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_66_MP7624_S66_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_66_MP7624_S66_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_66_MP7624_S66_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_67_MP7624_S67_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_67_MP7624_S67_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_67_MP7624_S67_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_67_MP7624_S67_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_68_MP7624_S68_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_68_MP7624_S68_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_68_MP7624_S68_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_68_MP7624_S68_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_69_MP7624_S69_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_69_MP7624_S69_R1_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_69_MP7624_S69_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_69_MP7624_S69_R2_001.fastq.gz...     
Copying gs://transfer-amlproject/20200304_6_MP7624_S6_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_6_MP7624_S6_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_6_MP7624_S6_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_6_MP7624_S6_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_7_MP7624_S7_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_7_MP7624_S7_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_7_MP7624_S7_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_7_MP7624_S7_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_8_MP7624_S8_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_8_MP7624_S8_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_8_MP7624_S8_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_8_MP7624_S8_R2_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_9_MP7624_S9_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_9_MP7624_S9_R1_001.fastq.gz...       
Copying gs://transfer-amlproject/20200304_9_MP7624_S9_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Removing gs://transfer-amlproject/20200304_9_MP7624_S9_R2_001.fastq.gz...       

==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.


Operation completed over 138 objects/240.6 GiB.                                  
In [8]:
! gsutil -m cp -r gs://transfer-amlproject/RNPv3 gs://amlproject/RNA/
Copying gs://transfer-amlproject/RNPv2/20200304_10_MP7624_S10_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_10_MP7624_S10_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_11_MP7624_S11_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_11_MP7624_S11_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_12_MP7624_S12_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_12_MP7624_S12_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_13_MP7624_S13_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_14_MP7624_S14_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_13_MP7624_S13_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_14_MP7624_S14_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_15_MP7624_S15_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_15_MP7624_S15_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_16_MP7624_S16_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_16_MP7624_S16_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_17_MP7624_S17_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_17_MP7624_S17_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_18_MP7624_S18_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_18_MP7624_S18_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_19_MP7624_S19_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_19_MP7624_S19_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_1_MP7624_S1_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_1_MP7624_S1_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_20_MP7624_S20_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_20_MP7624_S20_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_21_MP7624_S21_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_21_MP7624_S21_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_22_MP7624_S22_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_22_MP7624_S22_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_23_MP7624_S23_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_23_MP7624_S23_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_24_MP7624_S24_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_24_MP7624_S24_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_25_MP7624_S25_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_25_MP7624_S25_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_26_MP7624_S26_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_26_MP7624_S26_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_27_MP7624_S27_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_27_MP7624_S27_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_28_MP7624_S28_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_29_MP7624_S29_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_28_MP7624_S28_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_2_MP7624_S2_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_29_MP7624_S29_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_2_MP7624_S2_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_30_MP7624_S30_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_30_MP7624_S30_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_31_MP7624_S31_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_32_MP7624_S32_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_31_MP7624_S31_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_33_MP7624_S33_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_32_MP7624_S32_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_33_MP7624_S33_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_34_MP7624_S34_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_34_MP7624_S34_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_35_MP7624_S35_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_48_MP7624_S48_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_35_MP7624_S35_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_36_MP7624_S36_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_36_MP7624_S36_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_37_MP7624_S37_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_38_MP7624_S38_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_37_MP7624_S37_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_3_MP7624_S3_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_38_MP7624_S38_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_39_MP7624_S39_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_39_MP7624_S39_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_40_MP7624_S40_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_3_MP7624_S3_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_40_MP7624_S40_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_42_MP7624_S42_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_41_MP7624_S41_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_47_MP7624_S47_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_44_MP7624_S44_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_41_MP7624_S41_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_42_MP7624_S42_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_43_MP7624_S43_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_45_MP7624_S45_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_43_MP7624_S43_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_44_MP7624_S44_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_45_MP7624_S45_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_46_MP7624_S46_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_46_MP7624_S46_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_49_MP7624_S49_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_47_MP7624_S47_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_49_MP7624_S49_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_4_MP7624_S4_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_51_MP7624_S51_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_4_MP7624_S4_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_48_MP7624_S48_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_51_MP7624_S51_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_50_MP7624_S50_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_50_MP7624_S50_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_52_MP7624_S52_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_53_MP7624_S53_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_52_MP7624_S52_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_53_MP7624_S53_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_54_MP7624_S54_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_54_MP7624_S54_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_55_MP7624_S55_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_55_MP7624_S55_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_56_MP7624_S56_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_56_MP7624_S56_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_57_MP7624_S57_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_57_MP7624_S57_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_58_MP7624_S58_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_58_MP7624_S58_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_59_MP7624_S59_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_59_MP7624_S59_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_5_MP7624_S5_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_5_MP7624_S5_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_60_MP7624_S60_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_60_MP7624_S60_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_61_MP7624_S61_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_61_MP7624_S61_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_62_MP7624_S62_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_62_MP7624_S62_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_63_MP7624_S63_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_63_MP7624_S63_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_64_MP7624_S64_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_64_MP7624_S64_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_65_MP7624_S65_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_65_MP7624_S65_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_66_MP7624_S66_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_66_MP7624_S66_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_67_MP7624_S67_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_68_MP7624_S68_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_68_MP7624_S68_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_67_MP7624_S67_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_69_MP7624_S69_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_6_MP7624_S6_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_69_MP7624_S69_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_6_MP7624_S6_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_7_MP7624_S7_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_7_MP7624_S7_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_8_MP7624_S8_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_8_MP7624_S8_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_9_MP7624_S9_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/RNPv2/20200304_9_MP7624_S9_R2_001.fastq.gz [Content-Type=application/octet-stream]...
\ [138/138 files][240.6 GiB/240.6 GiB] 100% Done                                
Operation completed over 138 objects/240.6 GiB.                                  
In [10]:
! gsutil ls gs://amlproject/
gs://amlproject/MV-4-11.bai
gs://amlproject/MV-4-11.bam
gs://amlproject/Chip/
gs://amlproject/RNA/
gs://amlproject/RNPv2/
In [3]:
sampleset='RNPv3'
In [16]:
terra.uploadFromFolder('amlproject','RNPv2/',
                       'broad-firecloud-ccle/hg38_RNAseq',samplesetname=sampleset,
                      fformat="fastqR1R2", sep='_MP7624')
please be sure you gave access to your terra email account access to this bucket
['RNPv2/20200304_10_MP7624_S10_R1_001.fastq.gz', 'RNPv2/20200304_10_MP7624_S10_R2_001.fastq.gz', 'RNPv2/20200304_11_MP7624_S11_R1_001.fastq.gz', 'RNPv2/20200304_11_MP7624_S11_R2_001.fastq.gz', 'RNPv2/20200304_12_MP7624_S12_R1_001.fastq.gz', 'RNPv2/20200304_12_MP7624_S12_R2_001.fastq.gz', 'RNPv2/20200304_13_MP7624_S13_R1_001.fastq.gz', 'RNPv2/20200304_13_MP7624_S13_R2_001.fastq.gz', 'RNPv2/20200304_14_MP7624_S14_R1_001.fastq.gz', 'RNPv2/20200304_14_MP7624_S14_R2_001.fastq.gz', 'RNPv2/20200304_15_MP7624_S15_R1_001.fastq.gz', 'RNPv2/20200304_15_MP7624_S15_R2_001.fastq.gz', 'RNPv2/20200304_16_MP7624_S16_R1_001.fastq.gz', 'RNPv2/20200304_16_MP7624_S16_R2_001.fastq.gz', 'RNPv2/20200304_17_MP7624_S17_R1_001.fastq.gz', 'RNPv2/20200304_17_MP7624_S17_R2_001.fastq.gz', 'RNPv2/20200304_18_MP7624_S18_R1_001.fastq.gz', 'RNPv2/20200304_18_MP7624_S18_R2_001.fastq.gz', 'RNPv2/20200304_19_MP7624_S19_R1_001.fastq.gz', 'RNPv2/20200304_19_MP7624_S19_R2_001.fastq.gz', 'RNPv2/20200304_1_MP7624_S1_R1_001.fastq.gz', 'RNPv2/20200304_1_MP7624_S1_R2_001.fastq.gz', 'RNPv2/20200304_20_MP7624_S20_R1_001.fastq.gz', 'RNPv2/20200304_20_MP7624_S20_R2_001.fastq.gz', 'RNPv2/20200304_21_MP7624_S21_R1_001.fastq.gz', 'RNPv2/20200304_21_MP7624_S21_R2_001.fastq.gz', 'RNPv2/20200304_22_MP7624_S22_R1_001.fastq.gz', 'RNPv2/20200304_22_MP7624_S22_R2_001.fastq.gz', 'RNPv2/20200304_23_MP7624_S23_R1_001.fastq.gz', 'RNPv2/20200304_23_MP7624_S23_R2_001.fastq.gz', 'RNPv2/20200304_24_MP7624_S24_R1_001.fastq.gz', 'RNPv2/20200304_24_MP7624_S24_R2_001.fastq.gz', 'RNPv2/20200304_25_MP7624_S25_R1_001.fastq.gz', 'RNPv2/20200304_25_MP7624_S25_R2_001.fastq.gz', 'RNPv2/20200304_26_MP7624_S26_R1_001.fastq.gz', 'RNPv2/20200304_26_MP7624_S26_R2_001.fastq.gz', 'RNPv2/20200304_27_MP7624_S27_R1_001.fastq.gz', 'RNPv2/20200304_27_MP7624_S27_R2_001.fastq.gz', 'RNPv2/20200304_28_MP7624_S28_R1_001.fastq.gz', 'RNPv2/20200304_28_MP7624_S28_R2_001.fastq.gz', 'RNPv2/20200304_29_MP7624_S29_R1_001.fastq.gz', 'RNPv2/20200304_29_MP7624_S29_R2_001.fastq.gz', 'RNPv2/20200304_2_MP7624_S2_R1_001.fastq.gz', 'RNPv2/20200304_2_MP7624_S2_R2_001.fastq.gz', 'RNPv2/20200304_30_MP7624_S30_R1_001.fastq.gz', 'RNPv2/20200304_30_MP7624_S30_R2_001.fastq.gz', 'RNPv2/20200304_31_MP7624_S31_R1_001.fastq.gz', 'RNPv2/20200304_31_MP7624_S31_R2_001.fastq.gz', 'RNPv2/20200304_32_MP7624_S32_R1_001.fastq.gz', 'RNPv2/20200304_32_MP7624_S32_R2_001.fastq.gz', 'RNPv2/20200304_33_MP7624_S33_R1_001.fastq.gz', 'RNPv2/20200304_33_MP7624_S33_R2_001.fastq.gz', 'RNPv2/20200304_34_MP7624_S34_R1_001.fastq.gz', 'RNPv2/20200304_34_MP7624_S34_R2_001.fastq.gz', 'RNPv2/20200304_35_MP7624_S35_R1_001.fastq.gz', 'RNPv2/20200304_35_MP7624_S35_R2_001.fastq.gz', 'RNPv2/20200304_36_MP7624_S36_R1_001.fastq.gz', 'RNPv2/20200304_36_MP7624_S36_R2_001.fastq.gz', 'RNPv2/20200304_37_MP7624_S37_R1_001.fastq.gz', 'RNPv2/20200304_37_MP7624_S37_R2_001.fastq.gz', 'RNPv2/20200304_38_MP7624_S38_R1_001.fastq.gz', 'RNPv2/20200304_38_MP7624_S38_R2_001.fastq.gz', 'RNPv2/20200304_39_MP7624_S39_R1_001.fastq.gz', 'RNPv2/20200304_39_MP7624_S39_R2_001.fastq.gz', 'RNPv2/20200304_3_MP7624_S3_R1_001.fastq.gz', 'RNPv2/20200304_3_MP7624_S3_R2_001.fastq.gz', 'RNPv2/20200304_40_MP7624_S40_R1_001.fastq.gz', 'RNPv2/20200304_40_MP7624_S40_R2_001.fastq.gz', 'RNPv2/20200304_41_MP7624_S41_R1_001.fastq.gz', 'RNPv2/20200304_41_MP7624_S41_R2_001.fastq.gz', 'RNPv2/20200304_42_MP7624_S42_R1_001.fastq.gz', 'RNPv2/20200304_42_MP7624_S42_R2_001.fastq.gz', 'RNPv2/20200304_43_MP7624_S43_R1_001.fastq.gz', 'RNPv2/20200304_43_MP7624_S43_R2_001.fastq.gz', 'RNPv2/20200304_44_MP7624_S44_R1_001.fastq.gz', 'RNPv2/20200304_44_MP7624_S44_R2_001.fastq.gz', 'RNPv2/20200304_45_MP7624_S45_R1_001.fastq.gz', 'RNPv2/20200304_45_MP7624_S45_R2_001.fastq.gz', 'RNPv2/20200304_46_MP7624_S46_R1_001.fastq.gz', 'RNPv2/20200304_46_MP7624_S46_R2_001.fastq.gz', 'RNPv2/20200304_47_MP7624_S47_R1_001.fastq.gz', 'RNPv2/20200304_47_MP7624_S47_R2_001.fastq.gz', 'RNPv2/20200304_48_MP7624_S48_R1_001.fastq.gz', 'RNPv2/20200304_48_MP7624_S48_R2_001.fastq.gz', 'RNPv2/20200304_49_MP7624_S49_R1_001.fastq.gz', 'RNPv2/20200304_49_MP7624_S49_R2_001.fastq.gz', 'RNPv2/20200304_4_MP7624_S4_R1_001.fastq.gz', 'RNPv2/20200304_4_MP7624_S4_R2_001.fastq.gz', 'RNPv2/20200304_50_MP7624_S50_R1_001.fastq.gz', 'RNPv2/20200304_50_MP7624_S50_R2_001.fastq.gz', 'RNPv2/20200304_51_MP7624_S51_R1_001.fastq.gz', 'RNPv2/20200304_51_MP7624_S51_R2_001.fastq.gz', 'RNPv2/20200304_52_MP7624_S52_R1_001.fastq.gz', 'RNPv2/20200304_52_MP7624_S52_R2_001.fastq.gz', 'RNPv2/20200304_53_MP7624_S53_R1_001.fastq.gz', 'RNPv2/20200304_53_MP7624_S53_R2_001.fastq.gz', 'RNPv2/20200304_54_MP7624_S54_R1_001.fastq.gz', 'RNPv2/20200304_54_MP7624_S54_R2_001.fastq.gz', 'RNPv2/20200304_55_MP7624_S55_R1_001.fastq.gz', 'RNPv2/20200304_55_MP7624_S55_R2_001.fastq.gz', 'RNPv2/20200304_56_MP7624_S56_R1_001.fastq.gz', 'RNPv2/20200304_56_MP7624_S56_R2_001.fastq.gz', 'RNPv2/20200304_57_MP7624_S57_R1_001.fastq.gz', 'RNPv2/20200304_57_MP7624_S57_R2_001.fastq.gz', 'RNPv2/20200304_58_MP7624_S58_R1_001.fastq.gz', 'RNPv2/20200304_58_MP7624_S58_R2_001.fastq.gz', 'RNPv2/20200304_59_MP7624_S59_R1_001.fastq.gz', 'RNPv2/20200304_59_MP7624_S59_R2_001.fastq.gz', 'RNPv2/20200304_5_MP7624_S5_R1_001.fastq.gz', 'RNPv2/20200304_5_MP7624_S5_R2_001.fastq.gz', 'RNPv2/20200304_60_MP7624_S60_R1_001.fastq.gz', 'RNPv2/20200304_60_MP7624_S60_R2_001.fastq.gz', 'RNPv2/20200304_61_MP7624_S61_R1_001.fastq.gz', 'RNPv2/20200304_61_MP7624_S61_R2_001.fastq.gz', 'RNPv2/20200304_62_MP7624_S62_R1_001.fastq.gz', 'RNPv2/20200304_62_MP7624_S62_R2_001.fastq.gz', 'RNPv2/20200304_63_MP7624_S63_R1_001.fastq.gz', 'RNPv2/20200304_63_MP7624_S63_R2_001.fastq.gz', 'RNPv2/20200304_64_MP7624_S64_R1_001.fastq.gz', 'RNPv2/20200304_64_MP7624_S64_R2_001.fastq.gz', 'RNPv2/20200304_65_MP7624_S65_R1_001.fastq.gz', 'RNPv2/20200304_65_MP7624_S65_R2_001.fastq.gz', 'RNPv2/20200304_66_MP7624_S66_R1_001.fastq.gz', 'RNPv2/20200304_66_MP7624_S66_R2_001.fastq.gz', 'RNPv2/20200304_67_MP7624_S67_R1_001.fastq.gz', 'RNPv2/20200304_67_MP7624_S67_R2_001.fastq.gz', 'RNPv2/20200304_68_MP7624_S68_R1_001.fastq.gz', 'RNPv2/20200304_68_MP7624_S68_R2_001.fastq.gz', 'RNPv2/20200304_69_MP7624_S69_R1_001.fastq.gz', 'RNPv2/20200304_69_MP7624_S69_R2_001.fastq.gz', 'RNPv2/20200304_6_MP7624_S6_R1_001.fastq.gz', 'RNPv2/20200304_6_MP7624_S6_R2_001.fastq.gz', 'RNPv2/20200304_7_MP7624_S7_R1_001.fastq.gz', 'RNPv2/20200304_7_MP7624_S7_R2_001.fastq.gz', 'RNPv2/20200304_8_MP7624_S8_R1_001.fastq.gz', 'RNPv2/20200304_8_MP7624_S8_R2_001.fastq.gz', 'RNPv2/20200304_9_MP7624_S9_R1_001.fastq.gz', 'RNPv2/20200304_9_MP7624_S9_R2_001.fastq.gz']
> /home/jeremie/JKBio/TerraFunction.py(227)uploadFromFolder()
    226     ipdb.set_trace()
--> 227     df = pd.DataFrame(data)
    228     print(df)

ipdb> c
      sample_id                                             fastq1  \
0   20200304_10  gs://amlproject/RNPv2/20200304_10_MP7624_S10_R...   
1   20200304_11  gs://amlproject/RNPv2/20200304_11_MP7624_S11_R...   
2   20200304_12  gs://amlproject/RNPv2/20200304_12_MP7624_S12_R...   
3   20200304_13  gs://amlproject/RNPv2/20200304_13_MP7624_S13_R...   
4   20200304_14  gs://amlproject/RNPv2/20200304_14_MP7624_S14_R...   
..          ...                                                ...   
64  20200304_69  gs://amlproject/RNPv2/20200304_69_MP7624_S69_R...   
65   20200304_6  gs://amlproject/RNPv2/20200304_6_MP7624_S6_R1_...   
66   20200304_7  gs://amlproject/RNPv2/20200304_7_MP7624_S7_R1_...   
67   20200304_8  gs://amlproject/RNPv2/20200304_8_MP7624_S8_R1_...   
68   20200304_9  gs://amlproject/RNPv2/20200304_9_MP7624_S9_R1_...   

                                               fastq2  
0   gs://amlproject/RNPv2/20200304_10_MP7624_S10_R...  
1   gs://amlproject/RNPv2/20200304_11_MP7624_S11_R...  
2   gs://amlproject/RNPv2/20200304_12_MP7624_S12_R...  
3   gs://amlproject/RNPv2/20200304_13_MP7624_S13_R...  
4   gs://amlproject/RNPv2/20200304_14_MP7624_S14_R...  
..                                                ...  
64  gs://amlproject/RNPv2/20200304_69_MP7624_S69_R...  
65  gs://amlproject/RNPv2/20200304_6_MP7624_S6_R2_...  
66  gs://amlproject/RNPv2/20200304_7_MP7624_S7_R2_...  
67  gs://amlproject/RNPv2/20200304_8_MP7624_S8_R2_...  
68  gs://amlproject/RNPv2/20200304_9_MP7624_S9_R2_...  

[69 rows x 3 columns]
Successfully imported 69 participants.
Successfully imported 69 samples.
Successfully imported 1 sample sets:
  * MAX_AML_RNPv2 (69 samples)

Processing

In [4]:
wm = dm.WorkspaceManager('broad-firecloud-ccle/hg38_RNAseq')
In [19]:
submission_id = wm.create_submission("star_v1-0_BETA_cfg", sampleset, 'sample_set',expression='this.samples')
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission 2ad41571-b46e-4c3b-be51-44e800717d2a.
In [4]:
submission_id = wm.create_submission("rsem_v1-0_BETA_cfg", 
                                      sampleset,'sample_set',expression='this.samples')
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission cfd65243-2093-4007-9b21-c5b09c9fc875.
1tatus is: Failed for 0 jobs in submission 0. 2 mn elapsed.
10
11
12
13
14
15
16
17
18
19
2
20
21
22
23
24
25
26
27
28
29
3
30
31
32
33
34
35
36
37
38
39
4
40
41
42
43
44
45
46
47
48
49
5
50
51
52
53
54
55
56
57
58
59
6
60
61
62
63
64
65
66
67
68
69
7
70
71
72
73
8
9
0.0 of jobs Succeeded in submission 0.
-----------------------------------------------
RuntimeError  Traceback (most recent call last)
<ipython-input-4-50c8187cd693> in <module>
      1 submission_id = wm.create_submission("rsem_v1-0_BETA_cfg", 
      2                                       sampleset,'sample_set',expression='this.samples')
----> 3 terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)

~/JKBio/TerraFunction.py in waitForSubmission(workspace, submissions, raise_errors)
     93         print(str(done / (done + failed)) + " of jobs Succeeded in submission " + str(scount) + ".")
     94   if len(failed_submission) > 0 and raise_errors:
---> 95     raise RuntimeError(str(len(failed_submission)) + " failed submission")
     96   return failed_submission
     97   # print and return well formated data

RuntimeError: 73 failed submission
In [5]:
submission_id = wm.create_submission("rsem_aggregate_results_v1-0_BETA_cfg", 
                                         sampleset)
terra.waitForSubmission('broad-firecloud-ccle/hg38_RNAseq', submission_id)
Successfully created submission 9be600dc-4db0-4af1-b607-503800cc45fc.
1.0 of jobs Succeeded in submission 0.sion 0. 210 mn elapsed.
Out[5]:
[]
In [5]:
results = wm.get_sample_sets().loc[sampleset]
rsem_genes_expected_count = results['rsem_genes_expected_count']
In [7]:
results
Out[7]:
samples                            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1, 20...
rsem_transcripts_isopct            gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba...
rsem_transcripts_tpm               gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba...
rsem_transcripts_expected_count    gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba...
rsem_genes_tpm                     gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba...
rsem_genes_expected_count          gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcba...
Name: RNPv3, dtype: object
In [8]:
mkdir ../../data/RNPv3

Loading

In [9]:
! gsutil cp $rsem_genes_expected_count ../../data/RNPv3/
Copying gs://fc-secure-163bcce1-14a1-4cc2-b8f8-ec8bcbabe2da/9be600dc-4db0-4af1-b607-503800cc45fc/rsem_aggregate_results_workflow/abca308c-59a2-4ad5-8c87-9e4bdf407411/call-rsem_aggregate_results/RNPv3.rsem_genes_expected_count.txt.gz...
/ [1 files][  4.6 MiB/  4.6 MiB]                                                
Operation completed over 1 objects/4.6 MiB.                                      
In [6]:
file = '../../data/RNPv3/'+rsem_genes_expected_count.split('/')[-1]
In [11]:
file
Out[11]:
'../../data/RNPv3/RNPv3.rsem_genes_expected_count.txt.gz'
In [12]:
! gunzip $file
In [7]:
file
Out[7]:
'../../data/RNPv3/RNPv3.rsem_genes_expected_count.txt.gz'
In [8]:
rsem_genes_expected_count = pd.read_csv(file[:-3], sep='\t')
In [2]:
rsem_genes_expected_count = pd.read_csv("../../data/RNPv3/RNPv3.rsem_genes_expected_count.txt", sep='\t')
In [3]:
data = rsem_genes_expected_count.drop("transcript_id(s)",1)
In [4]:
data["gene_id"] = h.convertGenes(data['gene_id'])[0]
you need access to taiga for this (https://pypi.org/project/taigapy/)
20702 could not be parsed... we don't have all genes already
In [5]:
data=data.set_index('gene_id')
In [6]:
data
Out[6]:
1 10 11 12 13 14 15 16 17 18 ... 67 68 69 7 70 71 72 73 8 9
gene_id
TSPAN6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
TNMD 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
DPM1 1619.00 2465.00 1701.00 1535.00 1863.00 2093.00 2027.00 2202.00 2148.00 2235.00 ... 1620.00 1840.00 1729.00 1983.00 1926.0 1846.00 1915.00 2633.00 2451.00 2378.00
SCYL3 464.57 846.12 672.69 603.75 577.41 617.97 601.43 545.49 575.14 536.97 ... 430.78 460.04 437.36 542.42 572.5 507.48 580.49 713.56 670.02 576.38
C1orf112 780.43 1031.90 755.31 676.25 1232.70 1209.00 1309.60 1370.50 1245.90 1257.10 ... 949.22 1277.00 1032.60 1163.60 783.5 1088.50 1184.50 1572.40 1481.00 1332.90
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
ERCC-00164 3.00 5.00 8.00 2.00 2.00 1.00 2.00 1.00 3.00 3.00 ... 1.00 1.00 5.00 1.00 6.0 3.00 3.00 4.00 2.00 4.00
ERCC-00165 215.00 594.00 424.00 509.00 136.00 88.00 165.00 258.00 161.00 163.00 ... 93.00 139.00 87.00 127.00 628.0 207.00 151.00 241.00 187.00 176.00
ERCC-00168 3.00 12.00 9.00 8.00 0.00 8.00 0.00 5.00 5.00 1.00 ... 3.00 4.00 1.00 3.00 8.0 5.00 4.00 7.00 8.00 3.00
ERCC-00170 66.00 205.00 133.00 211.00 57.00 40.00 73.00 94.00 42.00 40.00 ... 41.00 56.00 33.00 50.00 141.0 72.00 92.00 110.00 89.00 88.00
ERCC-00171 13554.00 40900.00 29090.00 33242.00 10039.00 6399.00 10836.00 15684.00 9526.00 8893.00 ... 7058.00 7576.00 5882.00 8381.00 47913.0 12046.00 10447.00 17316.00 10492.00 12389.00

58813 rows × 73 columns

In [7]:
rename = {"1": "mr120-MV411-RNP_IRF2BP2-r4",
"2": "mr121-MV411-RNP_IRF2BP2-r5",
"3": "mr122-MV411-RNP_IRF2BP2-r6",
"4": "mr123-MV411-RNP_IRF8-r4",
"5": "mr124-MV411-RNP_IRF8-r5",
"6": "mr125-MV411-RNP_IRF8-r6",
"7": "mr126-MV411-RNP_MEF2D-r4",
"8": "mr127-MV411-RNP_MEF2D-r5",
"9": "mr128-MV411-RNP_MEF2D-r6",
"10": "mr129-MV411-RNP_MYC-r4",
"11": "mr130-MV411-RNP_MYC-r5",
"12": "mr131-MV411-RNP_MYC-r6",
"13": "mr132-MV411-RNP_RUNX1-r4",
"14": "mr133-MV411-RNP_RUNX1-r5",
"15": "mr134-MV411-RNP_RUNX1-r6",
"16": "mr135-MV411-RNP_RUNX2-r4",
"17": "mr136-MV411-RNP_RUNX2-r5",
"18": "mr137-MV411-RNP_RUNX2-r6",
"19": "mr138-MV411-RNP_SPI1-r4",
"20": "mr139-MV411-RNP_SPI1-r5",
"21": "mr140-MV411-RNP_SPI1-r6",
"22": "mr141-MV411-RNP_ZMYND8-r4",
"23": "mr142-MV411-RNP_ZMYND8-r5",
"24": "mr143-MV411-RNP_ZMYND8-r6",
"25": "mr144-MV411-RNP_LMO2-r4",
"26": "mr145-MV411-RNP_LMO2-r5",
"27": "mr146-MV411-RNP_LMO2-r6",
"28": "mr147-MV411-RNP_LYL1-r4",
"29": "mr148-MV411-RNP_LYL1-r5",
"30": "mr149-MV411-RNP_LYL1-r6",
"31": "mr150-MV411-RNP_MAX-r4",
"32": "mr151-MV411-RNP_MAX-r5",
"33": "mr152-MV411-RNP_MAX-r6",
"34": "mr153-MV411-RNP_ZEB2-r4",
"35": "mr154-MV411-RNP_ZEB2-r5",
"36": "mr155-MV411-RNP_ZEB2-r6",
"37": "mr156-MV411-RNP_MEF2C-r4",
"38": "mr157-MV411-RNP_MEF2C-r5",
"39": "mr158-MV411-RNP_MEF2C-r6",
"40": "mr159-MV411-RNP_MEIS1-r4",
"41": "mr160-MV411-RNP_MEIS1-r5",
"42": "mr161-MV411-RNP_MEIS1-r6",
"43": "mr162-MV411-RNP_FLI1-r4",
"44": "mr163-MV411-RNP_FLI1-r5",
"45": "mr164-MV411-RNP_FLI1-r6",
"46": "mr165-MV411-RNP_ELF2-r4",
"47": "mr166-MV411-RNP_ELF2-r5",
"48": "mr167-MV411-RNP_ELF2-r6",
"49": "mr168-MV411-RNP_GFI1-r4",
"50": "mr169-MV411-RNP_GFI1-r5",
"51": "mr170-MV411-RNP_GFI1-r6",
"52": "mr171-MV411-RNP_IKZF1-r4",
"53": "mr172-MV411-RNP_IKZF1-r5",
"54": "mr173-MV411-RNP_IKZF1-r6",
"55": "mr174-MV411-RNP_CEBPA-r4",
"56": "mr175-MV411-RNP_CEBPA-r5",
"57": "mr176-MV411-RNP_CEBPA-r6",
"58": "mr177-MV411-RNP_MYB-r4",
"59": "mr178-MV411-RNP_MYB-r5",
"60": "mr179-MV411-RNP_MYB-r6",
"61": "mr180-MV411-RNP_MYBL2-r1",
"62": "mr181-MV411-RNP_MYBL2-r2",
"63": "mr182-MV411-RNP_MYBL2-r3",
"64": "mr183-MV411-RNP_HOXA9-r4",
"65": "mr184-MV411-RNP_HOXA9-r5",
"66": "mr185-MV411-RNP_HOXA9-r6",
"67": "mr186-MV411-RNP_AAVS1-r1",
"68": "mr187-MV411-RNP_AAVS1-r2",
"69": "mr188-MV411-RNP_AAVS1-r3",
"70": "mr189-MV411-RNP_SP1-r4",
"71": "mr190-MV411-RNP_SP1-r5",
"72": "mr191-MV411-RNP_SP1-r6",
"73": "mr192-MV411-RNP_SP1-r7"}
In [8]:
data.columns
Out[8]:
Index(['1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '2',
       '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '3', '30',
       '31', '32', '33', '34', '35', '36', '37', '38', '39', '4', '40', '41',
       '42', '43', '44', '45', '46', '47', '48', '49', '5', '50', '51', '52',
       '53', '54', '55', '56', '57', '58', '59', '6', '60', '61', '62', '63',
       '64', '65', '66', '67', '68', '69', '7', '70', '71', '72', '73', '8',
       '9'],
      dtype='object')
In [9]:
data.columns = [rename[i] for i in data.columns]
In [10]:
data
Out[10]:
mr120-MV411-RNP_IRF2BP2-r4 mr129-MV411-RNP_MYC-r4 mr130-MV411-RNP_MYC-r5 mr131-MV411-RNP_MYC-r6 mr132-MV411-RNP_RUNX1-r4 mr133-MV411-RNP_RUNX1-r5 mr134-MV411-RNP_RUNX1-r6 mr135-MV411-RNP_RUNX2-r4 mr136-MV411-RNP_RUNX2-r5 mr137-MV411-RNP_RUNX2-r6 ... mr186-MV411-RNP_AAVS1-r1 mr187-MV411-RNP_AAVS1-r2 mr188-MV411-RNP_AAVS1-r3 mr126-MV411-RNP_MEF2D-r4 mr189-MV411-RNP_SP1-r4 mr190-MV411-RNP_SP1-r5 mr191-MV411-RNP_SP1-r6 mr192-MV411-RNP_SP1-r7 mr127-MV411-RNP_MEF2D-r5 mr128-MV411-RNP_MEF2D-r6
gene_id
TSPAN6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
TNMD 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
DPM1 1619.00 2465.00 1701.00 1535.00 1863.00 2093.00 2027.00 2202.00 2148.00 2235.00 ... 1620.00 1840.00 1729.00 1983.00 1926.0 1846.00 1915.00 2633.00 2451.00 2378.00
SCYL3 464.57 846.12 672.69 603.75 577.41 617.97 601.43 545.49 575.14 536.97 ... 430.78 460.04 437.36 542.42 572.5 507.48 580.49 713.56 670.02 576.38
C1orf112 780.43 1031.90 755.31 676.25 1232.70 1209.00 1309.60 1370.50 1245.90 1257.10 ... 949.22 1277.00 1032.60 1163.60 783.5 1088.50 1184.50 1572.40 1481.00 1332.90
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
ERCC-00164 3.00 5.00 8.00 2.00 2.00 1.00 2.00 1.00 3.00 3.00 ... 1.00 1.00 5.00 1.00 6.0 3.00 3.00 4.00 2.00 4.00
ERCC-00165 215.00 594.00 424.00 509.00 136.00 88.00 165.00 258.00 161.00 163.00 ... 93.00 139.00 87.00 127.00 628.0 207.00 151.00 241.00 187.00 176.00
ERCC-00168 3.00 12.00 9.00 8.00 0.00 8.00 0.00 5.00 5.00 1.00 ... 3.00 4.00 1.00 3.00 8.0 5.00 4.00 7.00 8.00 3.00
ERCC-00170 66.00 205.00 133.00 211.00 57.00 40.00 73.00 94.00 42.00 40.00 ... 41.00 56.00 33.00 50.00 141.0 72.00 92.00 110.00 89.00 88.00
ERCC-00171 13554.00 40900.00 29090.00 33242.00 10039.00 6399.00 10836.00 15684.00 9526.00 8893.00 ... 7058.00 7576.00 5882.00 8381.00 47913.0 12046.00 10447.00 17316.00 10492.00 12389.00

58813 rows × 73 columns

post processing and filtering

filter some more

In [11]:
toremove = np.argwhere(data.values.var(1)==0)
toremove.ravel()
Out[11]:
array([    1,    15,    24, ..., 58714, 58715, 58718])
In [12]:
toremove.shape
Out[12]:
(19991, 1)
In [13]:
data = data.drop(data.iloc[toremove.ravel()].index,0)
In [14]:
data.shape
Out[14]:
(38787, 73)
In [15]:
ERCC = data[~data.index.str.contains('ENSG00')]
In [16]:
ensg = data[data.index.str.contains('ENSG00')]
In [17]:
data = data[~data.index.str.contains('ENSG00')]

renormalize the data

In [18]:
len(ERCC)
Out[18]:
26672
In [19]:
ERCC
Out[19]:
mr120-MV411-RNP_IRF2BP2-r4 mr129-MV411-RNP_MYC-r4 mr130-MV411-RNP_MYC-r5 mr131-MV411-RNP_MYC-r6 mr132-MV411-RNP_RUNX1-r4 mr133-MV411-RNP_RUNX1-r5 mr134-MV411-RNP_RUNX1-r6 mr135-MV411-RNP_RUNX2-r4 mr136-MV411-RNP_RUNX2-r5 mr137-MV411-RNP_RUNX2-r6 ... mr186-MV411-RNP_AAVS1-r1 mr187-MV411-RNP_AAVS1-r2 mr188-MV411-RNP_AAVS1-r3 mr126-MV411-RNP_MEF2D-r4 mr189-MV411-RNP_SP1-r4 mr190-MV411-RNP_SP1-r5 mr191-MV411-RNP_SP1-r6 mr192-MV411-RNP_SP1-r7 mr127-MV411-RNP_MEF2D-r5 mr128-MV411-RNP_MEF2D-r6
gene_id
TSPAN6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
DPM1 1619.00 2465.00 1701.00 1535.00 1863.00 2093.00 2027.00 2202.00 2148.00 2235.00 ... 1620.00 1840.00 1729.00 1983.00 1926.0 1846.00 1915.00 2633.00 2451.00 2378.00
SCYL3 464.57 846.12 672.69 603.75 577.41 617.97 601.43 545.49 575.14 536.97 ... 430.78 460.04 437.36 542.42 572.5 507.48 580.49 713.56 670.02 576.38
C1orf112 780.43 1031.90 755.31 676.25 1232.70 1209.00 1309.60 1370.50 1245.90 1257.10 ... 949.22 1277.00 1032.60 1163.60 783.5 1088.50 1184.50 1572.40 1481.00 1332.90
FGR 1443.00 8556.00 6387.00 5955.00 2359.00 2615.00 2258.00 3340.00 3229.00 3466.00 ... 2323.00 2401.00 2230.00 3680.00 2016.0 2285.00 2384.00 3106.00 4706.00 4308.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
ERCC-00164 3.00 5.00 8.00 2.00 2.00 1.00 2.00 1.00 3.00 3.00 ... 1.00 1.00 5.00 1.00 6.0 3.00 3.00 4.00 2.00 4.00
ERCC-00165 215.00 594.00 424.00 509.00 136.00 88.00 165.00 258.00 161.00 163.00 ... 93.00 139.00 87.00 127.00 628.0 207.00 151.00 241.00 187.00 176.00
ERCC-00168 3.00 12.00 9.00 8.00 0.00 8.00 0.00 5.00 5.00 1.00 ... 3.00 4.00 1.00 3.00 8.0 5.00 4.00 7.00 8.00 3.00
ERCC-00170 66.00 205.00 133.00 211.00 57.00 40.00 73.00 94.00 42.00 40.00 ... 41.00 56.00 33.00 50.00 141.0 72.00 92.00 110.00 89.00 88.00
ERCC-00171 13554.00 40900.00 29090.00 33242.00 10039.00 6399.00 10836.00 15684.00 9526.00 8893.00 ... 7058.00 7576.00 5882.00 8381.00 47913.0 12046.00 10447.00 17316.00 10492.00 12389.00

26672 rows × 73 columns

Loading the CRC members

In [20]:
ctf=pd.read_csv('../data/CTF.csv',header=None)[0].values.tolist()
ctf
Out[20]:
['ARID2',
 'CEBPA',
 'CEBPE',
 'E2F3',
 'FLI1',
 'FOSL2',
 'GFI1',
 'GFI1B',
 'HHEX',
 'IRF8',
 'LYL1',
 'MEF2C',
 'MEF2D',
 'MEIS1',
 'MTF1',
 'MYB',
 'MYC',
 'PLAGL2',
 'RUNX1',
 'RUNX2',
 'RXRA',
 'SETDB1',
 'SNAPC5',
 'SP1',
 'SPI1',
 'SREBF1',
 'STAT5B',
 'TERF2',
 'TFAP4',
 'ZEB2',
 'ZFPM1',
 'ZMYND8',
 'LMO2',
 'MAX',
 'ELF2',
 'ETV6',
 'HOXA9',
 'GATA2']

Making and running the dashboard

In [21]:
%%R
library('erccdashboard')
R[write to console]: Loading required package: ggplot2

R[write to console]: Loading required package: gridExtra

R[write to console]: 
Attaching package: ‘gridExtra’


R[write to console]: The following object is masked from ‘package:Biobase’:

    combine


R[write to console]: The following object is masked from ‘package:BiocGenerics’:

    combine


In [22]:
ERCC = ERCC.astype(int)
In [23]:
ERCC['Feature'] = ERCC.index
In [50]:
sns.heatmap(np.log2(ERCC[ERCC.index.str.contains('ERCC-')][['mr186-MV411-RNP_AAVS1-r1', 'mr187-MV411-RNP_AAVS1-r2', 'mr188-MV411-RNP_AAVS1-r3','mr129-MV411-RNP_MYC-r4', 'mr189-MV411-RNP_SP1-r4', 'mr120-MV411-RNP_IRF2BP2-r4']].values / ERCC[ERCC.index.str.contains('ERCC-')][['mr186-MV411-RNP_AAVS1-r1', 'mr187-MV411-RNP_AAVS1-r2', 'mr188-MV411-RNP_AAVS1-r3','mr129-MV411-RNP_MYC-r4', 'mr189-MV411-RNP_SP1-r4', 'mr120-MV411-RNP_IRF2BP2-r4']].values.mean(0)+1))
Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f702dfab190>
In [24]:
experiments = list(set([i.split('-')[2] for i in ERCC.columns[:-1]]))
experiments.remove("RNP_AAVS1")
In [25]:
#TODO: compute the mass from concentration
###################################################
### code chunk number 3: defineInputData
###################################################
%R datType = "count" # "count" for RNA-Seq data, "array" for microarray data
%R isNorm = F # flag to indicate if input expression measures are already normalized, default is FALSE 
%R filenameRoot = "RNPv2" # user defined filename prefix for results files
%R sample2Name = "AAAVS1" # name for sample 2 in the experiment
%R erccmix = "Single" # name of ERCC mixture design, "RatioPair" is default
%R erccdilution = 1/100 # dilution factor used for Ambion spike-in mixtures
%R spikeVol = 1 # volume (in microliters) of diluted spike-in mixture added to total RNA mass
%R choseFDR = 0.1 # user defined false discovery rate (FDR), default is 0.05
Out[25]:
array([0.1])
In [26]:
cols = list(ERCC.columns)
cols.sort()
res={}
for val in experiments:
    d = {}
    e=0
    d.update({
        'Feature':'Feature'
    })
    for i in cols[:-1]:
        if val+'-' in i:
            e+=1
            d.update({i: val.split('_')[-1]+'_'+str(e)})
    d.update({
        'mr186-MV411-RNP_AAVS1-r1': 'AAAVS1_1',
        'mr187-MV411-RNP_AAVS1-r2': 'AAAVS1_2',
        'mr188-MV411-RNP_AAVS1-r3': 'AAAVS1_3'
    })
    a = ERCC[list(d.keys())].rename(columns=d)
    a.to_csv('../data/ERCC_estimation.csv', index=None)
    val = val.split('_')[-1]
    
    torm = 'RNPv2.'+val+'.AAAVS1.All.Pvals.csv'
    ! rm $torm 
    %R -i val print(val)
    %R print(sample2Name)
    %R a <- read.csv('../data/ERCC_estimation.csv')
    %R print(head(a))
    %R exDat = ''
    %R totalRNAmass <- 0.5
    try:
        %R -i val exDat = initDat(datType = datType, isNorm = isNorm, exTable = a, filenameRoot = filenameRoot, sample1Name = val, sample2Name = sample2Name, erccmix = erccmix, erccdilution = erccdilution, spikeVol = spikeVol, totalRNAmass = totalRNAmass, choseFDR = choseFDR)
        %R exDat = est_r_m(exDat)
        %R exDat = dynRangePlot(exDat)
    except Warning:
        print("failed for "+val)
        continue
    except:
        print('worked for '+val)
    %R print(summary(exDat))
    %R grid.arrange(exDat$Figures$dynRangePlot)
    %R grid.arrange(exDat$Figures$r_mPlot)
    %R grid.arrange(exDat$Figures$rangeResidPlot)
    %R -o rm rm <- exDat$Results$r_m.res$r_m.mn
    %R -o se se <- exDat$Results$r_m.res$r_m.mnse
    res[val] = (rm[0],se[0])
rm: cannot remove 'RNPv2.RUNX1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "RUNX1"
[1] "AAAVS1"
   Feature RUNX1_1 RUNX1_2 RUNX1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1863    2093    2027     1620     1840     1729
3    SCYL3     577     617     601      430      460      437
4 C1orf112    1232    1209    1309      949     1277     1032
5      FGR    2359    2615    2258     2323     2401     2230
6      CFH       8       9       7        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.RUNX1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17128 transcripts remain for  analysis.
A total of 22 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00134 ERCC-00137 ERCC-00138
ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2247.25 2328 2294.25 1629 1895 1703
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
70 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
0.01362734 

GLM log(r_m) estimate weighted s.e.:
0.1356891 

Number of ERCCs in Mix 1 dyn range:  70 

Number of ERCCs in Mix 2 dyn range:  70 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00031 ERCC-00097 ERCC-00120 ERCC-00168 ERCC-00073


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.MYC.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MYC"
[1] "AAAVS1"
   Feature MYC_1 MYC_2 MYC_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6     0     0     0        0        0        0
2     DPM1  2465  1701  1535     1620     1840     1729
3    SCYL3   846   672   603      430      460      437
4 C1orf112  1031   755   676      949     1277     1032
5      FGR  8556  6387  5955     2323     2401     2230
6      CFH     5     1     2        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.MYC.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17015 transcripts remain for  analysis.
A total of 11 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057
ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00098 ERCC-00117
ERCC-00142

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2374 1836.5 1790.5 1643 1913.5 1714
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
81 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
-1.430577 

GLM log(r_m) estimate weighted s.e.:
0.1054966 

Number of ERCCs in Mix 1 dyn range:  81 

Number of ERCCs in Mix 2 dyn range:  81 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00041 ERCC-00017 ERCC-00073 ERCC-00081 ERCC-00086
ERCC-00104 ERCC-00109 ERCC-00123 ERCC-00134 ERCC-00137
ERCC-00138 ERCC-00156


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.RUNX2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "RUNX2"
[1] "AAAVS1"
   Feature RUNX2_1 RUNX2_2 RUNX2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    2202    2148    2235     1620     1840     1729
3    SCYL3     545     575     536      430      460      437
4 C1orf112    1370    1245    1257      949     1277     1032
5      FGR    3340    3229    3466     2323     2401     2230
6      CFH      16      12      14        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.RUNX2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17042 transcripts remain for  analysis.
A total of 20 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00109 ERCC-00117
ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2369.75 2268 2240.75 1638.75 1908.5 1710.75
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
72 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
-0.2177111 

GLM log(r_m) estimate weighted s.e.:
0.1235403 

Number of ERCCs in Mix 1 dyn range:  72 

Number of ERCCs in Mix 2 dyn range:  72 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00073 ERCC-00097 ERCC-00134 ERCC-00104


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.LYL1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "LYL1"
[1] "AAAVS1"
   Feature LYL1_1 LYL1_2 LYL1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   1954   1656   2061     1620     1840     1729
3    SCYL3    572    428    588      430      460      437
4 C1orf112   1241    952   1107      949     1277     1032
5      FGR   2786   2397   3052     2323     2401     2230
6      CFH      7     14     13        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.LYL1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16829 transcripts remain for  analysis.
A total of 20 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2302 1853 2252 1669 1951 1743
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
72 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00144

GLM log(r_m) estimate:
0.1154768 

GLM log(r_m) estimate weighted s.e.:
0.09762555 

Number of ERCCs in Mix 1 dyn range:  72 

Number of ERCCs in Mix 2 dyn range:  72 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00097 ERCC-00134 ERCC-00168 ERCC-00073 ERCC-00123


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.IKZF1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "IKZF1"
[1] "AAAVS1"
   Feature IKZF1_1 IKZF1_2 IKZF1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1299    1529    2015     1620     1840     1729
3    SCYL3     361     406     571      430      460      437
4 C1orf112     836     967    1213      949     1277     1032
5      FGR    2082    1867    3154     2323     2401     2230
6      CFH       4       6       5        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.IKZF1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16752 transcripts remain for  analysis.
A total of 22 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024
ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075
ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104
ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138
ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1576.25 1526 2348.25 1677 1966.25 1753
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
70 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
0.2648816 

GLM log(r_m) estimate weighted s.e.:
0.1193648 

Number of ERCCs in Mix 1 dyn range:  70 

Number of ERCCs in Mix 2 dyn range:  70 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00031 ERCC-00040 ERCC-00073 ERCC-00097 ERCC-00134
ERCC-00158 ERCC-00164 ERCC-00168


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.MEIS1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MEIS1"
[1] "AAAVS1"
   Feature MEIS1_1 MEIS1_2 MEIS1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1916    2046    2726     1620     1840     1729
3    SCYL3     477     554     683      430      460      437
4 C1orf112    1121    1128    1408      949     1277     1032
5      FGR    1935    2193    2556     2323     2401     2230
6      CFH       7       3      12        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.MEIS1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16907 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00117
ERCC-00123 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2115 2194 2639.5 1658 1938 1730
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
0.2082356 

GLM log(r_m) estimate weighted s.e.:
0.1646045 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00097 ERCC-00164 ERCC-00168 ERCC-00073 ERCC-00109


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.FLI1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "FLI1"
[1] "AAAVS1"
   Feature FLI1_1 FLI1_2 FLI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   1892   2087   2588     1620     1840     1729
3    SCYL3    450    555    668      430      460      437
4 C1orf112   1196   1338   1591      949     1277     1032
5      FGR   2480   2602   3360     2323     2401     2230
6      CFH      3      3      4        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.FLI1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16821 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2055 2218 2616 1669 1953 1743
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00039 ERCC-00019

GLM log(r_m) estimate:
0.2669788 

GLM log(r_m) estimate weighted s.e.:
0.08613995 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00013 ERCC-00097 ERCC-00120 ERCC-00134 ERCC-00164
ERCC-00073


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.ELF2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "ELF2"
[1] "AAAVS1"
   Feature ELF2_1 ELF2_2 ELF2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   2516   1913   1971     1620     1840     1729
3    SCYL3    640    486    584      430      460      437
4 C1orf112   1315   1056   1278      949     1277     1032
5      FGR   3206   2242   2711     2323     2401     2230
6      CFH      4      8      5        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.ELF2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16904 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2414.75 1863 2194 1658.5 1938.5 1731
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
0.1883588 

GLM log(r_m) estimate weighted s.e.:
0.1001319 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00040 ERCC-00073 ERCC-00120 ERCC-00123 ERCC-00164


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.MYBL2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MYBL2"
[1] "AAAVS1"
   Feature MYBL2_1 MYBL2_2 MYBL2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1881    3921    1347     1620     1840     1729
3    SCYL3     469    1039     389      430      460      437
4 C1orf112    1108    2192     863      949     1277     1032
5      FGR    2573    5804    2117     2323     2401     2230
6      CFH      18      18       8        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.MYBL2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17053 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1865 3829 1543 1638 1906 1710
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00039

GLM log(r_m) estimate:
0.4145379 

GLM log(r_m) estimate weighted s.e.:
0.109987 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00013 ERCC-00031 ERCC-00073 ERCC-00077 ERCC-00097
ERCC-00120 ERCC-00134 ERCC-00147 ERCC-00158 ERCC-00168


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.IRF2BP2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "IRF2BP2"
[1] "AAAVS1"
   Feature IRF2BP2_1 IRF2BP2_2 IRF2BP2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6         0         0         0        0        0        0
2     DPM1      1619      1938      2043     1620     1840     1729
3    SCYL3       464       545       564      430      460      437
4 C1orf112       780       776       908      949     1277     1032
5      FGR      1443      1587      1765     2323     2401     2230
6      CFH         3         5        15        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.IRF2BP2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16582 transcripts remain for  analysis.
A total of 13 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057
ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00086 ERCC-00098
ERCC-00117 ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1614.75 1750.75 2094 1704 1995 1776
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
79 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
-1.242144 

GLM log(r_m) estimate weighted s.e.:
0.2116787 

Number of ERCCs in Mix 1 dyn range:  79 

Number of ERCCs in Mix 2 dyn range:  79 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00041 ERCC-00138 ERCC-00017 ERCC-00073 ERCC-00081
ERCC-00104 ERCC-00109 ERCC-00123 ERCC-00134 ERCC-00137


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.ZEB2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "ZEB2"
[1] "AAAVS1"
   Feature ZEB2_1 ZEB2_2 ZEB2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   2361   2261   1810     1620     1840     1729
3    SCYL3    531    527    481      430      460      437
4 C1orf112   1086   1059    945      949     1277     1032
5      FGR   2523   2566   2552     2323     2401     2230
6      CFH      1      1      0        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.ZEB2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16877 transcripts remain for  analysis.
A total of 19 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00138 ERCC-00142

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2103 2164 2008 1663 1944 1734
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
73 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
-0.1701759 

GLM log(r_m) estimate weighted s.e.:
0.1445402 

Number of ERCCs in Mix 1 dyn range:  73 

Number of ERCCs in Mix 2 dyn range:  73 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00031 ERCC-00041 ERCC-00097 ERCC-00120 ERCC-00156
ERCC-00158 ERCC-00164 ERCC-00073 ERCC-00134 ERCC-00137


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.MYB.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MYB"
[1] "AAAVS1"
   Feature MYB_1 MYB_2 MYB_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6     0     0     0        0        0        0
2     DPM1  1695  1557  1288     1620     1840     1729
3    SCYL3   582   482   460      430      460      437
4 C1orf112   831   825   776      949     1277     1032
5      FGR  3674  3220  2807     2323     2401     2230
6      CFH    10    17    11        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.MYB.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16938 transcripts remain for  analysis.
A total of 19 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00109 ERCC-00117
ERCC-00137 ERCC-00138 ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1929.75 1799 1536 1654.75 1933 1726.75
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
73 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00039 ERCC-00144

GLM log(r_m) estimate:
-0.5666497 

GLM log(r_m) estimate weighted s.e.:
0.1645544 

Number of ERCCs in Mix 1 dyn range:  73 

Number of ERCCs in Mix 2 dyn range:  73 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00040 ERCC-00097 ERCC-00104 ERCC-00120 ERCC-00123
ERCC-00134 ERCC-00164 ERCC-00168 ERCC-00073


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.HOXA9.AAAVS1.All.Pvals.csv': No such file or directory
[1] "HOXA9"
[1] "AAAVS1"
   Feature HOXA9_1 HOXA9_2 HOXA9_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1842    2075    2081     1620     1840     1729
3    SCYL3     516     575     602      430      460      437
4 C1orf112    1174    1241    1190      949     1277     1032
5      FGR    2239    2364    2372     2323     2401     2230
6      CFH       4      10       8        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.HOXA9.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16777 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2114 2247 2145 1675 1962 1750
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
0.4186265 

GLM log(r_m) estimate weighted s.e.:
0.1449086 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00040 ERCC-00073 ERCC-00097 ERCC-00120 ERCC-00134
ERCC-00147 ERCC-00164


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.SP1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "SP1"
[1] "AAAVS1"
   Feature SP1_1 SP1_2 SP1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6     0     0     0        0        0        0
2     DPM1  1926  1846  1915     1620     1840     1729
3    SCYL3   572   507   580      430      460      437
4 C1orf112   783  1088  1184      949     1277     1032
5      FGR  2016  2285  2384     2323     2401     2230
6      CFH    15    13    15        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.SP1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16820 transcripts remain for  analysis.
A total of 13 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00083 ERCC-00098
ERCC-00117 ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2120.5 1938.25 2252 1669.25 1953.25 1743.25
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
79 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
-0.907468 

GLM log(r_m) estimate weighted s.e.:
0.2659066 

Number of ERCCs in Mix 1 dyn range:  79 

Number of ERCCs in Mix 2 dyn range:  79 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00041 ERCC-00081 ERCC-00097 ERCC-00104 ERCC-00120
ERCC-00134 ERCC-00138 ERCC-00073 ERCC-00086 ERCC-00109
ERCC-00123 ERCC-00137


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.CEBPA.AAAVS1.All.Pvals.csv': No such file or directory
[1] "CEBPA"
[1] "AAAVS1"
   Feature CEBPA_1 CEBPA_2 CEBPA_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1418     547    1781     1620     1840     1729
3    SCYL3     459     177     589      430      460      437
4 C1orf112     908     426    1171      949     1277     1032
5      FGR    1659     648    1791     2323     2401     2230
6      CFH       7       1      10        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.CEBPA.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16595 transcripts remain for  analysis.
A total of 22 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00013 ERCC-00016 ERCC-00017 ERCC-00024
ERCC-00041 ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075
ERCC-00081 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104
ERCC-00109 ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138
ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1763 743 2081.5 1704 1993.5 1775
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
70 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
0.04728101 

GLM log(r_m) estimate weighted s.e.:
0.2244516 

Number of ERCCs in Mix 1 dyn range:  70 

Number of ERCCs in Mix 2 dyn range:  70 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00067 ERCC-00073 ERCC-00097 ERCC-00120 ERCC-00123
ERCC-00147 ERCC-00158 ERCC-00164 ERCC-00168


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.ZMYND8.AAAVS1.All.Pvals.csv': No such file or directory
[1] "ZMYND8"
[1] "AAAVS1"
   Feature ZMYND8_1 ZMYND8_2 ZMYND8_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6        0        0        0        0        0        0
2     DPM1     2140     1697     1859     1620     1840     1729
3    SCYL3      608      551      661      430      460      437
4 C1orf112     1311     1123     1319      949     1277     1032
5      FGR     4209     3864     4504     2323     2401     2230
6      CFH        8        6        7        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.ZMYND8.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17092 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2342 2038.25 2372 1633 1900.25 1707
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
0.05464554 

GLM log(r_m) estimate weighted s.e.:
0.1512365 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00040 ERCC-00120 ERCC-00134 ERCC-00168 ERCC-00073


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.MAX.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MAX"
[1] "AAAVS1"
   Feature MAX_1 MAX_2 MAX_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6     0     0     0        0        0        0
2     DPM1  1811  2032  2172     1620     1840     1729
3    SCYL3   571   656   742      430      460      437
4 C1orf112  1215  1387  1393      949     1277     1032
5      FGR  3640  4163  4084     2323     2401     2230
6      CFH     9     5     3        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.MAX.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16957 transcripts remain for  analysis.
A total of 15 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083
ERCC-00086 ERCC-00098 ERCC-00117 ERCC-00138 ERCC-00142

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2142 2502 2512 1651 1928 1725
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
77 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00039

GLM log(r_m) estimate:
-0.6875484 

GLM log(r_m) estimate weighted s.e.:
0.1118295 

Number of ERCCs in Mix 1 dyn range:  77 

Number of ERCCs in Mix 2 dyn range:  77 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00123 ERCC-00134 ERCC-00168 ERCC-00041 ERCC-00073
ERCC-00104 ERCC-00109 ERCC-00137 ERCC-00156


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.MEF2C.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MEF2C"
[1] "AAAVS1"
   Feature MEF2C_1 MEF2C_2 MEF2C_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1877    1951    1803     1620     1840     1729
3    SCYL3     459     498     519      430      460      437
4 C1orf112    1127    1049    1138      949     1277     1032
5      FGR    2652    3037    2824     2323     2401     2230
6      CFH       3       7       5        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.MEF2C.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16818 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00134 ERCC-00137 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
1959.75 2084 2098.75 1669.75 1953.75 1743.75
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
0.2163322 

GLM log(r_m) estimate weighted s.e.:
0.1600957 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00013 ERCC-00097 ERCC-00123 ERCC-00164 ERCC-00168
ERCC-00073


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.SPI1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "SPI1"
[1] "AAAVS1"
   Feature SPI1_1 SPI1_2 SPI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      1        0        0        0
2     DPM1   2415   1729   2302     1620     1840     1729
3    SCYL3    798    648    744      430      460      437
4 C1orf112   1054    742   1104      949     1277     1032
5      FGR   2369   1766   2458     2323     2401     2230
6      CFH     44     22     58        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.SPI1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17191 transcripts remain for  analysis.
A total of 11 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00016 ERCC-00024 ERCC-00048 ERCC-00057 ERCC-00061
ERCC-00075 ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104
ERCC-00142

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2758.5 2102.5 2723 1622 1888 1696
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
81 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
-1.274165 

GLM log(r_m) estimate weighted s.e.:
0.2595628 

Number of ERCCs in Mix 1 dyn range:  81 

Number of ERCCs in Mix 2 dyn range:  81 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00012 ERCC-00109 ERCC-00117 ERCC-00123 ERCC-00134
ERCC-00137 ERCC-00138 ERCC-00017 ERCC-00041 ERCC-00073
ERCC-00081 ERCC-00156


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.LMO2.AAAVS1.All.Pvals.csv': No such file or directory
[1] "LMO2"
[1] "AAAVS1"
   Feature LMO2_1 LMO2_2 LMO2_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   1907   2199   2141     1620     1840     1729
3    SCYL3    561    592    644      430      460      437
4 C1orf112   1229   1188   1285      949     1277     1032
5      FGR   2777   3265   2969     2323     2401     2230
6      CFH     13      8     10        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.LMO2.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16882 transcripts remain for  analysis.
A total of 20 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00117
ERCC-00123 ERCC-00134 ERCC-00138 ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2221.75 2325 2312.5 1662 1942.5 1733
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
72 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
0.2066036 

GLM log(r_m) estimate weighted s.e.:
0.1053062 

Number of ERCCs in Mix 1 dyn range:  72 

Number of ERCCs in Mix 2 dyn range:  72 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00120 ERCC-00137 ERCC-00158 ERCC-00164 ERCC-00168
ERCC-00073 ERCC-00109


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.MEF2D.AAAVS1.All.Pvals.csv': No such file or directory
[1] "MEF2D"
[1] "AAAVS1"
   Feature MEF2D_1 MEF2D_2 MEF2D_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6       0       0       0        0        0        0
2     DPM1    1983    2451    2378     1620     1840     1729
3    SCYL3     542     670     576      430      460      437
4 C1orf112    1163    1481    1332      949     1277     1032
5      FGR    3680    4706    4308     2323     2401     2230
6      CFH      17      12      14        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.MEF2D.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  17024 transcripts remain for  analysis.
A total of 17 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083
ERCC-00086 ERCC-00098 ERCC-00109 ERCC-00117 ERCC-00123
ERCC-00142 ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2150.25 2742.25 2546 1642 1913 1713
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
75 

Outlier ERCCs for GLM r_m Estimate:
ERCC-00144

GLM log(r_m) estimate:
-0.08897608 

GLM log(r_m) estimate weighted s.e.:
0.1281747 

Number of ERCCs in Mix 1 dyn range:  75 

Number of ERCCs in Mix 2 dyn range:  75 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00041 ERCC-00134 ERCC-00073 ERCC-00104 ERCC-00137
ERCC-00138


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.GFI1.AAAVS1.All.Pvals.csv': No such file or directory
[1] "GFI1"
[1] "AAAVS1"
   Feature GFI1_1 GFI1_2 GFI1_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   3000    984   1798     1620     1840     1729
3    SCYL3    708    258    466      430      460      437
4 C1orf112   1813    586   1037      949     1277     1032
5      FGR   2396    788   1525     2323     2401     2230
6      CFH     42     18     35        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.GFI1.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16711 transcripts remain for  analysis.
A total of 21 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00012 ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041
ERCC-00048 ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081
ERCC-00083 ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109
ERCC-00117 ERCC-00123 ERCC-00134 ERCC-00138 ERCC-00142
ERCC-00156

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
3122 1018 1947 1690.5 1977 1757
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
71 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
0.0614345 

GLM log(r_m) estimate weighted s.e.:
0.1106509 

Number of ERCCs in Mix 1 dyn range:  71 

Number of ERCCs in Mix 2 dyn range:  71 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00040 ERCC-00097 ERCC-00120 ERCC-00137 ERCC-00158
ERCC-00164 ERCC-00168 ERCC-00073


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
rm: cannot remove 'RNPv2.IRF8.AAAVS1.All.Pvals.csv': No such file or directory
[1] "IRF8"
[1] "AAAVS1"
   Feature IRF8_1 IRF8_2 IRF8_3 AAAVS1_1 AAAVS1_2 AAAVS1_3
1   TSPAN6      0      0      0        0        0        0
2     DPM1   2211   2243   2269     1620     1840     1729
3    SCYL3    611    621    622      430      460      437
4 C1orf112   1390   1268   1244      949     1277     1032
5      FGR   3652   3917   4442     2323     2401     2230
6      CFH     16     17     15        6        5        9

Initializing the exDat list structure...
choseFDR = 0.1 
repNormFactor is NULL 
Filename root is: RNPv2.IRF8.AAAVS1 

Transcripts were removed with a mean count < 1 or more than 2 
replicates with 0 counts.
Original data contained  26672 transcripts. 
After filtering  16800 transcripts remain for  analysis.
A total of 18 out of 92 
ERCC controls were filtered from the data set
The excluded ERCCs are:
ERCC-00016 ERCC-00017 ERCC-00024 ERCC-00041 ERCC-00048
ERCC-00057 ERCC-00061 ERCC-00075 ERCC-00081 ERCC-00083
ERCC-00086 ERCC-00098 ERCC-00104 ERCC-00109 ERCC-00117
ERCC-00123 ERCC-00138 ERCC-00142

repNormFactor is NULL,
 Using Default Upper Quartile Normalization Method  - 75th percentile

normVec:
2385 2327 2453 1672.25 1957.5 1744.25
Check for sample mRNA fraction differences(r_m)...

Number of ERCC Controls Used in r_m estimate
74 

Outlier ERCCs for GLM r_m Estimate:
None 

GLM log(r_m) estimate:
0.08350448 

GLM log(r_m) estimate weighted s.e.:
0.1106991 

Number of ERCCs in Mix 1 dyn range:  74 

Number of ERCCs in Mix 2 dyn range:  74 
These ERCCs were not included in the signal-abundance plot,
because not enough non-zero replicate measurements of these
 controls were obtained for both samples:

ERCC-00012 ERCC-00013 ERCC-00134 ERCC-00137 ERCC-00164
ERCC-00168 ERCC-00073 ERCC-00156


Saving dynRangePlot to exDat
              Length Class      Mode     
sampleInfo    11     -none-     list     
plotInfo       9     -none-     list     
erccInfo       4     -none-     list     
Transcripts    7     data.frame list     
designMat      3     data.frame list     
sampleNames    2     -none-     character
idCols         6     data.frame list     
normERCCDat    7     data.frame list     
normFactor     6     -none-     numeric  
mnLibeFactor   1     -none-     numeric  
spikeFraction  1     -none-     numeric  
idColsAdj      6     data.frame list     
Results        4     -none-     list     
Figures        3     -none-     list     
In [27]:
for i, v in res.items():
    if abs(v[0]) > 3*v[1]:
        print(i, v[0])
MYC -1.430576835252246
FLI1 0.2669788365781275
MYBL2 0.4145378723566837
IRF2BP2 -1.2421436514123199
MYB -0.5666496866194601
SP1 -0.9074679568707595
MAX -0.6875484167700773
SPI1 -1.2741653425093569
In [30]:
ERCC[ERCC.index.str.contains('ERCC-')][[i for i in ERCC.columns if 'AAVS1' in i]].mean()
Out[30]:
mr186-MV411-RNP_AAVS1-r1    2705.054348
mr187-MV411-RNP_AAVS1-r2    3576.510870
mr188-MV411-RNP_AAVS1-r3    2621.956522
dtype: float64
In [32]:
ERCC[ERCC.index.str.contains('ERCC-')][[i for i in ERCC.columns if 'SPI1' in i]].mean()
Out[32]:
mr138-MV411-RNP_SPI1-r4    34945.043478
mr139-MV411-RNP_SPI1-r5     8218.032609
mr140-MV411-RNP_SPI1-r6     8112.847826
dtype: float64
In [28]:
scaling = res
In [110]:
scaling
Out[110]:
{'MYB': (-0.5666496866194601, 0.16455438308564643),
 'MEF2C': (0.21633221486591706, 0.16009568270385865),
 'LMO2': (0.2066036480588095, 0.10530622574043316),
 'MEIS1': (0.20823559991440868, 0.16460447494728012),
 'IKZF1': (0.26488156665796003, 0.11936483909099824),
 'CEBPA': (0.04728101063315868, 0.22445160295741662),
 'ELF2': (0.18835876643089494, 0.10013191844645487),
 'MEF2D': (-0.08897607523943744, 0.12817467579731256),
 'RUNX2': (-0.21771114300468575, 0.12354032980074721),
 'IRF2BP2': (-1.2421436514123199, 0.2116786922337),
 'MYBL2': (0.4145378723566837, 0.10998698893732116),
 'MAX': (-0.6875484167700773, 0.11182951672314183),
 'LYL1': (0.11547676609947306, 0.09762554626023551),
 'RUNX1': (0.013627339651964025, 0.1356890688647267),
 'FLI1': (0.2669788365781275, 0.08613995212995244),
 'HOXA9': (0.41862648305962474, 0.14490862380188851),
 'SPI1': (-1.2741653425093569, 0.2595628445427471),
 'ZMYND8': (0.05464554271508272, 0.1512365231509835),
 'MYC': (-1.430576835252246, 0.10549660323839703),
 'GFI1': (0.061434499699685764, 0.11065088877815657),
 'SP1': (-0.9074679568707595, 0.26590656079563213),
 'ZEB2': (-0.1701758517854591, 0.1445402147201962),
 'IRF8': (0.08350447764203282, 0.11069908626789565)}
In [29]:
h.dictToFileToFile(scaling,"../results/RNPv2/scaling.json")
In [21]:
scaling = h.fileToDict("../results/RNPv2/scaling.json")

Correlation analysis across replicates

In [40]:
%matplotlib inline
ig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(data.corr(), 
            xticklabels=data.columns,
            yticklabels=data.columns, ax=ax)
Out[40]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f1f323aef10>
In [177]:
model = AgglomerativeClustering(n_clusters=15,linkage="average", 
                                affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(data.corr())
ii = itertools.count(data.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
In [376]:
data.to_csv('../results/RNPv2/counts.csv')
In [27]:
data = pd.read_csv('../results/RNPv2/counts.csv',index_col=0)
In [377]:
%matplotlib inline

sns.clustermap(data.corr(), figsize=(20, 20))

plt.savefig('../results/RNPv2/cluster_corr_count.pdf')
In [179]:
data.sum().tolist()
Out[179]:
[31194860.27000039,
 34734170.910000145,
 41947063.61999977,
 46794854.38000023,
 45959725.04999988,
 48187669.949999854,
 43703179.22999995,
 54815404.069999784,
 51453432.84000005,
 45694014.92000012,
 37739408.16000016,
 35925369.88000013,
 45939275.84999983,
 46049236.90999998,
 47474159.87999978,
 48525076.05999996,
 45690646.539999746,
 45157321.31999988,
 56639651.62999975,
 41764180.25999997,
 53047868.079999454,
 45963304.22999989,
 42284214.549999595,
 47507365.27999984,
 43762796.11999972,
 45382911.53999989,
 46972864.209999934,
 45345593.949999996,
 37246793.10999977,
 44768420.24999964,
 42046067.34999971,
 50800605.66999957,
 51176436.25999986,
 42939652.28999985,
 44136137.289999895,
 40740731.69999998,
 38508207.550000004,
 41500257.68999979,
 41227894.83000014,
 43337577.789999865,
 43352847.28999995,
 51316363.68999997,
 40072110.34000017,
 43282705.06999982,
 51083598.04999976,
 47140394.049999766,
 37620883.43999992,
 44039610.83999986,
 61484638.129999965,
 20045963.380000293,
 38556072.189999774,
 31634429.490000147,
 29835972.010000307,
 47235734.2699997,
 34097279.28000006,
 14896010.669999905,
 40029165.88999997,
 38726353.51999988,
 37015620.039999984,
 31655845.910000257,
 37291884.63999993,
 77020486.5900007,
 76035190.80000074,
 80821407.53000104,
 88932208.80000061,
 96200436.35000083,
 33570457.16000021,
 39525165.01000017,
 35056555.59000006]
In [120]:
data.shape
Out[120]:
(26580, 73)

Differential expression analysis

In [119]:
data
Out[119]:
mr120-MV411-RNP_IRF2BP2-r4 mr129-MV411-RNP_MYC-r4 mr130-MV411-RNP_MYC-r5 mr131-MV411-RNP_MYC-r6 mr132-MV411-RNP_RUNX1-r4 mr133-MV411-RNP_RUNX1-r5 mr134-MV411-RNP_RUNX1-r6 mr135-MV411-RNP_RUNX2-r4 mr136-MV411-RNP_RUNX2-r5 mr137-MV411-RNP_RUNX2-r6 ... mr186-MV411-RNP_AAVS1-r1 mr187-MV411-RNP_AAVS1-r2 mr188-MV411-RNP_AAVS1-r3 mr126-MV411-RNP_MEF2D-r4 mr189-MV411-RNP_SP1-r4 mr190-MV411-RNP_SP1-r5 mr191-MV411-RNP_SP1-r6 mr192-MV411-RNP_SP1-r7 mr127-MV411-RNP_MEF2D-r5 mr128-MV411-RNP_MEF2D-r6
gene_id
TSPAN6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
DPM1 1619.00 2465.00 1701.00 1535.00 1863.00 2093.00 2027.00 2202.00 2148.00 2235.00 ... 1620.00 1840.00 1729.00 1983.00 1926.0 1846.00 1915.00 2633.00 2451.00 2378.00
SCYL3 464.57 846.12 672.69 603.75 577.41 617.97 601.43 545.49 575.14 536.97 ... 430.78 460.04 437.36 542.42 572.5 507.48 580.49 713.56 670.02 576.38
C1orf112 780.43 1031.90 755.31 676.25 1232.70 1209.00 1309.60 1370.50 1245.90 1257.10 ... 949.22 1277.00 1032.60 1163.60 783.5 1088.50 1184.50 1572.40 1481.00 1332.90
FGR 1443.00 8556.00 6387.00 5955.00 2359.00 2615.00 2258.00 3340.00 3229.00 3466.00 ... 2323.00 2401.00 2230.00 3680.00 2016.0 2285.00 2384.00 3106.00 4706.00 4308.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
BMP8B-AS1 3.00 2.00 2.00 4.00 10.00 9.00 9.00 8.00 4.00 7.00 ... 6.00 5.00 4.00 3.00 3.0 6.00 7.00 10.00 3.00 7.00
H2AL1SP 0.00 0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00
NIPBL-DT 462.00 650.00 478.00 431.00 777.00 829.00 782.00 709.00 743.00 776.00 ... 497.00 653.00 673.00 889.00 673.0 628.00 871.00 962.00 1099.00 1024.00
CERNA2 2.00 7.00 8.00 3.00 13.00 6.00 24.00 9.00 8.00 12.00 ... 4.00 10.00 10.00 3.00 0.0 18.00 28.00 28.00 1.00 7.06
LINC02689 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.00 0.00 0.00 0.0 0.00 0.00 0.00 0.00 0.00

26580 rows × 73 columns

In [26]:
experiments = list(set([i.split('-')[2] for i in data.columns[:-1]]))
In [27]:
experiments
Out[27]:
['RNP_LMO2',
 'RNP_MAX',
 'RNP_HOXA9',
 'RNP_LYL1',
 'RNP_RUNX2',
 'RNP_ELF2',
 'RNP_IKZF1',
 'RNP_FLI1',
 'RNP_MEF2C',
 'RNP_MYBL2',
 'RNP_CEBPA',
 'RNP_IRF8',
 'RNP_MEF2D',
 'RNP_MEIS1',
 'RNP_IRF2BP2',
 'RNP_ZEB2',
 'RNP_GFI1',
 'RNP_SP1',
 'RNP_AAVS1',
 'RNP_ZMYND8',
 'RNP_SPI1',
 'RNP_MYC',
 'RNP_RUNX1',
 'RNP_MYB']
In [28]:
experiments.remove("RNP_AAVS1")
In [29]:
data['gene_id'] = data.index

TODO: what happens with housekeeping genes

In [142]:
results = {}
for val in experiments:  
    design = pd.DataFrame(index=data.columns[:-1], columns=['DMSO','Target'], 
                          data=np.array([[1 if 'RNP_AAVS1' in i else 0 for i in data.columns[:-1]],[1 if val+'-' in i else 0 for i in data.columns[:-1]]]).T)
    design.index = design.index.astype(str).str.replace('-','.')
    deseq = pyDESeq2.pyDESeq2(count_matrix=data, design_matrix = design, 
                              design_formula='~DMSO + Target', gene_column="gene_id")
    if abs(scaling[val.split('_')[1]][0]) > 3*scaling[val.split('_')[1]][1]:
        print("estimating sizeFactors for this one")
        deseq.run_estimate_size_factors(controlGenes=data.gene_id.str.contains("ERCC-"))
    deseq.run_deseq()
    deseq.get_deseq_result()
    r = deseq.deseq_result
    r.pvalue = np.nan_to_num(np.array(r.pvalue), 1)
    r.log2FoldChange = np.nan_to_num(np.array(r.log2FoldChange), 0)
    results[val] = r
3.3.2
estimating sizeFactors for this one
R[write to console]: using pre-existing size factors

R[write to console]: estimating dispersions

R[write to console]: gene-wise dispersion estimates

R[write to console]: mean-dispersion relationship

R[write to console]: final dispersion estimates

R[write to console]: fitting model and testing

R[write to console]: -- replacing outliers and refitting for 127 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

R[write to console]: estimating dispersions

R[write to console]: fitting model and testing

volcano plot with CRC members highlighted

In [143]:
for val in experiments:
    a = h.volcano(results[val],tohighlight=ctf,title=val, maxvalue= 60, searchbox=True, minlogfold=0.5)
    try:
        show(a)
    except RuntimeError:
        show(a)
In [144]:
for k, val in results.items():
    val.to_csv('../results/RNPv2/deseq_'+k+".csv")
In [62]:
results = {}
des = ! ls ../results/RNPv2/deseq_RNP_*.csv
for val in des:
    results["RNP_"+val.split('RNP_')[1].split('.')[0]] = pd.read_csv(val,index_col=0)

Making the ccsv file for max

In [63]:
results.keys()
Out[63]:
dict_keys(['RNP_all', 'RNP_CEBPA', 'RNP_ELF2', 'RNP_FLI1', 'RNP_GFI1', 'RNP_HOXA9', 'RNP_IKZF1', 'RNP_IRF2BP2', 'RNP_IRF8', 'RNP_LMO2', 'RNP_LYL1', 'RNP_MAX', 'RNP_MEF2C', 'RNP_MEF2D', 'RNP_MEIS1', 'RNP_MYB', 'RNP_MYBL2', 'RNP_MYC', 'RNP_RUNX1', 'RNP_RUNX2', 'RNP_SP1', 'RNP_SPI1', 'RNP_ZEB2', 'RNP_ZMYND8'])
In [64]:
results.pop('RNP_all')
Out[64]:
RNP_CEBPA_fc_log2 RNP_CEBPA_padj RNP_CEBPA_pval RNP_ELF2_fc_log2 RNP_ELF2_padj RNP_ELF2_pval RNP_FLI1_fc_log2 RNP_FLI1_padj RNP_FLI1_pval RNP_GFI1_fc_log2 ... RNP_SP1_pval RNP_SPI1_fc_log2 RNP_SPI1_padj RNP_SPI1_pval RNP_ZEB2_fc_log2 RNP_ZEB2_padj RNP_ZEB2_pval RNP_ZMYND8_fc_log2 RNP_ZMYND8_padj RNP_ZMYND8_pval
TSPAN6 0.642130 NaN 0.930596 -0.064479 NaN 0.993022 0.424549 0.999961 0.954080 0.207821 ... 0.875304 -0.393437 NaN 9.573317e-01 0.022311 NaN 0.997585 -0.167083 NaN 0.981920
DPM1 -0.146773 0.175869 0.055432 0.109824 0.986459 0.142868 0.487813 0.940560 0.273223 0.086520 ... 0.024196 -1.578211 0.000952 1.818801e-04 0.208905 0.018236 0.003507 -0.164134 0.99984 0.026264
SCYL3 0.060378 0.701136 0.497529 0.008106 0.998610 0.924098 0.350550 0.940560 0.394458 -0.105824 ... 0.018508 -1.266968 0.003332 1.240800e-03 -0.055466 0.695996 0.516608 0.005266 0.99984 0.950385
C1orf112 0.126713 0.461042 0.242680 0.106333 0.986459 0.319820 0.597610 0.940560 0.211661 0.140322 ... 0.036461 -1.948156 0.000159 1.429172e-05 -0.052419 0.779765 0.626967 0.048705 0.99984 0.650304
FGR -0.582974 0.118835 0.031944 -0.130497 0.986459 0.639216 0.340905 0.945846 0.447361 -0.812484 ... 0.003588 -2.030338 0.000016 7.057808e-07 -0.127653 0.792324 0.646082 0.433732 0.99984 0.112956
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
ERCC-00164 -0.922723 0.662125 0.447910 -3.518530 0.986459 0.010779 -0.155868 0.999961 0.878590 -3.252890 ... 0.504492 0.345732 0.654121 6.166040e-01 -1.553886 0.401426 0.217815 -1.146495 NaN 0.305981
ERCC-00165 -0.503993 0.579337 0.354594 -1.030428 0.986459 0.054943 -0.159404 0.940560 0.209857 -0.689089 ... 0.619204 0.005076 0.965538 9.608078e-01 -0.227981 0.811671 0.674145 -0.715056 0.99984 0.185209
ERCC-00168 -0.723045 0.693688 0.487767 -0.300976 0.986871 0.732154 0.232214 0.999961 0.776989 -1.026813 ... 0.444718 0.032827 0.964837 9.597500e-01 -1.497910 0.329879 0.162756 -2.296520 0.99984 0.060384
ERCC-00170 -0.708528 0.426421 0.213389 -0.912857 0.986459 0.102280 -0.104913 0.999961 0.646929 -0.935368 ... 0.589275 -0.009492 0.965538 9.607581e-01 -0.245517 0.801773 0.660472 -0.831967 0.99984 0.135771
ERCC-00171 -0.449763 0.634579 0.415045 -0.853544 0.986459 0.110680 -0.083967 0.940560 0.298420 -0.633504 ... 0.942262 0.005329 0.953708 9.472072e-01 -0.295838 0.754667 0.592621 -0.676164 0.99984 0.208286

26672 rows × 69 columns

In [152]:
tosave = pd.DataFrame(index=results['RNP_CEBPA'].index)
for k,v in results.items():
    tosave[k+'_fc_log2'] = v.log2FoldChange
    tosave[k+'_padj'] = v.padj
    tosave[k+'_pval'] = v.pvalue
In [153]:
tosave.to_csv('../results/RNPv2/deseq_RNP_all.csv')

Looking at CRC members only

In [163]:
ctf.extend(['IRF2BP2','MYBL2','IKZF1'])

maybe use adjusted p_value

In [164]:
deseq = pd.DataFrame(index=ctf)
for k, val in results.items():
    deseq[k] = [i.log2FoldChange if i.pvalue<0.05 else 0 for a, i in val.loc[ctf].iterrows()]
In [165]:
deseq
Out[165]:
RNP_CEBPA RNP_ELF2 RNP_FLI1 RNP_GFI1 RNP_HOXA9 RNP_IKZF1 RNP_IRF2BP2 RNP_IRF8 RNP_LMO2 RNP_LYL1 ... RNP_MEIS1 RNP_MYB RNP_MYBL2 RNP_MYC RNP_RUNX1 RNP_RUNX2 RNP_SP1 RNP_SPI1 RNP_ZEB2 RNP_ZMYND8
ARID2 0.340050 0.000000 0 0.000000 0.000000 0.000000 -1.947945 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.695128 0.000000 0.00000 -0.868870 -1.234878 0.000000 0.000000
CEBPA 0.665869 0.000000 0 -0.514618 0.000000 0.000000 -1.459458 0.417750 0.000000 0.000000 ... 0.00000 -1.024082 0 -1.857561 -0.301267 0.00000 -0.920091 -1.078669 0.000000 0.000000
CEBPE -1.171271 0.000000 0 -1.175967 0.000000 0.000000 -2.094375 1.783117 0.000000 0.000000 ... 0.00000 0.000000 0 0.000000 0.000000 0.00000 -1.109580 -2.989194 0.000000 0.000000
E2F3 0.000000 0.000000 0 0.000000 0.000000 0.000000 -1.932833 0.200048 0.000000 0.000000 ... 0.00000 -1.143534 0 -2.178933 0.000000 0.00000 -0.827072 -1.312344 0.000000 0.000000
FLI1 0.000000 0.000000 0 0.000000 0.000000 0.000000 -1.519046 0.360083 0.000000 0.000000 ... 0.00000 0.000000 0 -1.487533 0.000000 0.00000 -0.902816 -2.198507 -0.562026 0.000000
FOSL2 -2.904566 0.000000 0 0.000000 0.000000 0.000000 -1.383493 0.000000 0.000000 0.000000 ... 0.00000 -2.238943 0 -1.380069 0.000000 0.00000 0.000000 -2.014765 0.000000 0.000000
GFI1 0.000000 0.000000 0 -0.821509 0.000000 0.000000 -1.871571 0.459210 0.000000 0.000000 ... 0.00000 -1.229774 0 -1.410259 0.000000 0.00000 -0.931032 -1.207145 0.000000 0.000000
GFI1B 0.000000 0.000000 0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -3.412761 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000
HHEX -1.019358 0.000000 0 0.000000 0.000000 0.000000 -1.924986 0.542010 -0.463475 -0.496866 ... 0.00000 0.000000 0 -1.272680 0.000000 0.00000 -1.098091 -1.852349 0.524570 0.000000
IRF8 -1.838495 0.000000 0 0.653750 0.000000 0.000000 -2.353826 -0.718261 0.000000 0.000000 ... 0.00000 -1.468729 0 -1.964017 0.000000 0.00000 0.000000 -2.057056 0.000000 0.000000
LYL1 0.000000 0.000000 0 0.000000 0.000000 0.000000 -1.810271 0.000000 0.000000 -0.837499 ... 0.00000 -1.408875 0 -2.145511 0.818892 0.00000 -1.082395 -2.897184 0.000000 0.000000
MEF2C 0.866923 0.000000 0 0.000000 0.000000 0.000000 -1.529377 0.000000 -0.548640 -0.630461 ... 0.00000 0.000000 0 -1.918495 0.000000 0.00000 0.000000 -1.700600 0.000000 0.000000
MEF2D 0.000000 0.000000 0 0.000000 0.000000 0.000000 -2.103142 -1.102758 0.000000 0.000000 ... 0.00000 -2.488134 0 -2.010969 0.000000 0.00000 0.000000 -2.236388 0.000000 0.000000
MEIS1 0.406437 0.000000 0 0.000000 0.000000 0.000000 -1.737788 0.000000 0.000000 0.000000 ... -1.64894 0.000000 0 -1.568498 0.000000 0.00000 -0.791120 -1.337573 0.000000 0.000000
MTF1 -0.359724 0.000000 0 -0.234824 0.000000 0.000000 -1.720132 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.463445 0.000000 0.00000 -0.821132 -1.529028 0.000000 0.000000
MYB -0.305394 0.000000 0 0.000000 0.000000 0.000000 -2.112967 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.829395 0.000000 0.00000 -0.857136 -1.936176 -0.374210 0.000000
MYC -0.866361 0.000000 0 0.000000 0.000000 0.000000 -1.711034 0.000000 0.000000 0.000000 ... 0.00000 -1.252326 0 -1.371390 0.000000 0.00000 -0.959778 -1.670780 -0.738478 0.000000
PLAGL2 0.000000 0.000000 0 0.000000 0.000000 0.000000 -2.263852 0.000000 0.000000 0.000000 ... 0.00000 -1.581096 0 -2.051150 0.000000 0.00000 -0.853702 -1.663457 0.000000 0.000000
RUNX1 0.000000 0.000000 0 0.000000 0.000000 0.000000 -1.940890 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.458095 -0.481340 0.00000 -0.705648 -1.203356 0.282527 0.000000
RUNX2 0.000000 0.000000 0 0.000000 0.000000 0.000000 -1.356702 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -2.025772 0.000000 -1.77579 0.000000 0.000000 0.469164 0.000000
RXRA -0.335731 0.000000 0 -0.412112 0.000000 0.214228 -1.580221 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.692986 0.000000 0.00000 -0.839900 -1.556593 0.286307 0.000000
SETDB1 0.000000 0.000000 0 0.000000 0.000000 0.000000 -1.718604 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.996760 0.000000 0.00000 -0.801147 -1.298399 -0.225842 0.000000
SNAPC5 0.000000 0.000000 0 0.000000 0.000000 0.000000 -1.386950 0.000000 0.000000 0.000000 ... 0.00000 -0.971078 0 -2.138556 0.000000 0.00000 -0.895299 -1.633219 0.000000 0.000000
SP1 0.000000 0.000000 0 0.000000 0.000000 0.000000 -1.835315 0.000000 0.000000 0.000000 ... 0.00000 -0.918558 0 -1.918205 0.000000 0.00000 -1.403918 -1.414893 0.000000 0.000000
SPI1 0.000000 0.000000 0 0.000000 0.000000 0.000000 -1.479062 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.568381 0.000000 0.00000 -0.899333 -2.785086 0.000000 0.000000
SREBF1 0.826931 0.000000 0 0.000000 0.000000 0.000000 -2.030056 0.000000 0.000000 0.000000 ... 0.00000 -1.056367 0 -2.835746 0.000000 0.00000 -1.176922 -1.621751 0.537165 0.000000
STAT5B -0.282620 0.000000 0 0.000000 0.000000 0.000000 -1.733956 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.970013 0.000000 0.00000 -0.838704 -1.016132 -0.250087 0.000000
TERF2 0.000000 0.000000 0 0.118994 0.000000 0.000000 -1.631483 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -2.271328 0.000000 0.00000 -0.799337 -1.392899 -0.128301 0.000000
TFAP4 -0.481024 0.000000 0 0.000000 0.000000 0.000000 -1.363109 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -3.278864 0.000000 0.00000 -0.898665 -1.620129 0.000000 0.000000
ZEB2 -0.269948 0.000000 0 0.000000 0.000000 0.204938 -1.711839 0.000000 0.000000 0.000000 ... 0.00000 -0.914863 0 -1.681862 0.000000 0.00000 -0.758267 -1.224329 -0.384563 0.000000
ZFPM1 1.281751 0.000000 0 0.000000 0.000000 0.000000 -1.780308 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.613582 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000
ZMYND8 0.000000 0.000000 0 0.000000 0.000000 0.000000 -1.625913 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.954080 0.000000 0.00000 -0.864796 -1.163031 0.000000 -0.169019
LMO2 0.347173 0.000000 0 0.000000 0.000000 0.000000 -2.072513 0.000000 0.000000 0.000000 ... 0.00000 -1.648474 0 -1.613718 0.000000 0.00000 -0.782027 -1.272213 0.000000 0.000000
MAX 0.000000 0.000000 0 0.000000 0.000000 0.000000 -1.505584 0.000000 0.000000 0.000000 ... 0.00000 -0.933559 0 -1.891442 0.000000 0.00000 0.000000 -1.352632 0.000000 0.000000
ELF2 0.000000 -0.773196 0 0.000000 0.000000 0.000000 -1.346107 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.701935 0.000000 0.00000 -0.780536 -1.283684 0.000000 0.000000
ETV6 0.249116 0.000000 0 0.000000 0.000000 0.000000 -1.279559 0.000000 0.000000 0.000000 ... 0.00000 -1.340763 0 -1.770362 0.000000 0.00000 -0.892123 -1.613257 0.000000 0.000000
HOXA9 0.000000 0.000000 0 0.000000 -0.683747 -0.246604 -1.773468 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -1.720200 0.000000 0.00000 -0.708210 -1.199064 0.495589 0.000000
GATA2 -3.803775 0.000000 0 0.000000 0.000000 0.000000 -3.559850 0.000000 0.000000 0.000000 ... 0.00000 0.000000 0 -3.539672 -2.493064 0.00000 0.000000 1.276900 0.000000 -1.531774
IRF2BP2 0.000000 0.000000 0 0.000000 0.000000 0.000000 -2.578022 0.000000 0.000000 0.000000 ... 0.00000 -0.991376 0 -1.246974 0.000000 0.00000 0.000000 -1.414520 0.000000 0.000000
MYBL2 0.000000 0.000000 0 0.000000 0.000000 0.000000 -2.082278 0.000000 0.000000 0.000000 ... 0.00000 -1.224332 0 -2.740802 0.000000 0.00000 -0.902065 -1.787022 0.000000 0.000000
IKZF1 0.000000 0.000000 0 0.000000 0.000000 -0.375969 -1.812064 -0.418599 0.000000 0.000000 ... 0.00000 0.000000 0 -1.668116 0.000000 0.00000 0.000000 -2.168195 0.000000 0.000000

41 rows × 23 columns

In [166]:
fig = sns.clustermap(figsize=(25,20), data=deseq,vmin=-1,vmax=1,xticklabels=deseq.columns, yticklabels=deseq.index)
In [167]:
fig.savefig('../results/RNPv2/clustermap_ctf_deseq.pdf')
In [169]:
deseq.columns = [i.split('_')[1] for i in deseq.columns]
deseq = deseq.loc[deseq.columns]
In [170]:
deseq.to_csv('../results/RNPv2/deseq_CTFmat.csv')
In [171]:
deseq = pd.read_csv('../results/RNPv2/deseq_CTFmat.csv',index_col=0)
In [172]:
net = nx.from_pandas_adjacency(((deseq < -0.8) | (deseq > 0.4)).T,create_using=nx.DiGraph)
In [173]:
pos = nx.nx_agraph.graphviz_layout(net, prog="neato")
In [174]:
colors = ['red' if deseq.loc[i[1],i[0]]> 0 else 'blue' for i in net.edges]

blue is down, red is up

In [175]:
plt.figure(figsize=(8, 8))
nx.draw(net,pos,with_labels=True,edge_color=colors)
plt.show()
In [176]:
deseq[(deseq > -0.8) & (deseq < 0.3)]=0
net = nx.from_pandas_adjacency(deseq.T,create_using=nx.DiGraph)
In [177]:
pos = nx.nx_agraph.graphviz_layout(net, prog='dot')
In [178]:
colors = [-deseq.loc[i[1],i[0]] for i in net.edges]
In [179]:
colors = [i/-min(colors) if i <0 else i/max(colors) for i in colors]
In [180]:
plt.figure(figsize=(8, 8))
nx.draw(net,pos,with_labels=True, edge_color=colors,edge_cmap=plt.cm.RdYlBu)
plt.show()

We are looking for bias in the data and the replicates

In [28]:
col = {v:i for i, v in enumerate(set([i.split('-')[2] for i in data.columns[:-1]]))}
In [29]:
red = PCA(2).fit_transform(data[data.columns[:-1]].T)
h.scatter(red, labels=data.columns[:-1], radi=60000, colors=[col[i.split('-')[2]] for i in data.columns[:-1]])
Out[29]:
Figure(
id = '1003', …)
In [30]:
red = PCA(30).fit_transform(data[data.columns[:-1]].T)
red = TSNE(2,4).fit_transform(red)

mr129-MYC-r4 seems weird

In [34]:
h.scatter(red, labels=data.columns[:-1], radi=70, colors=[col[i.split('-')[2]] for i in data.columns[:-1]])
Out[34]:
Figure(
id = '1417', …)
In [38]:
pca = PCA(20)
red = pca.fit_transform(data[data.columns[:-1]].T)
In [39]:
pca.explained_variance_ratio_
Out[39]:
array([0.50756446, 0.23809133, 0.07047153, 0.0639693 , 0.03091387,
       0.02514611, 0.01419286, 0.01084615, 0.00979492, 0.00555606,
       0.00471327, 0.00311185, 0.00237603, 0.00216884, 0.00169693,
       0.00132933, 0.0012026 , 0.00105288, 0.00081574, 0.00060336])

GSEA analysis

In [303]:
data
Out[303]:
mr120-MV411-RNP_IRF2BP2-r4 mr129-MV411-RNP_MYC-r4 mr130-MV411-RNP_MYC-r5 mr131-MV411-RNP_MYC-r6 mr132-MV411-RNP_RUNX1-r4 mr133-MV411-RNP_RUNX1-r5 mr134-MV411-RNP_RUNX1-r6 mr135-MV411-RNP_RUNX2-r4 mr136-MV411-RNP_RUNX2-r5 mr137-MV411-RNP_RUNX2-r6 ... mr182-MV411-RNP_MYBL2-r3 mr183-MV411-RNP_HOXA9-r4 mr184-MV411-RNP_HOXA9-r5 mr185-MV411-RNP_HOXA9-r6 mr186-MV411-RNP_AAVS1-r1 mr187-MV411-RNP_AAVS1-r2 mr188-MV411-RNP_AAVS1-r3 mr126-MV411-RNP_MEF2D-r4 mr127-MV411-RNP_MEF2D-r5 mr128-MV411-RNP_MEF2D-r6
gene_id
TSPAN6 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.0 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00
DPM1 1619.00 2465.00 1701.00 1535.00 1863.00 2093.00 2027.00 2202.00 2148.00 2235.00 ... 3272.00 3686.0 3990.0 4714.0 1620.00 1840.00 1729.00 1983.00 2451.00 2378.00
SCYL3 464.57 846.12 672.69 603.75 577.41 617.97 601.43 545.49 575.14 536.97 ... 961.52 1024.2 1155.4 1316.6 430.78 460.04 437.36 542.42 670.02 576.38
C1orf112 780.43 1031.90 755.31 676.25 1232.70 1209.00 1309.60 1370.50 1245.90 1257.10 ... 1647.50 2260.8 2422.6 2757.4 949.22 1277.00 1032.60 1163.60 1481.00 1332.90
FGR 1443.00 8556.00 6387.00 5955.00 2359.00 2615.00 2258.00 3340.00 3229.00 3466.00 ... 4120.00 4514.0 4748.0 5478.0 2323.00 2401.00 2230.00 3680.00 4706.00 4308.00
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
BMP8B-AS1 3.00 2.00 2.00 4.00 10.00 9.00 9.00 8.00 4.00 7.00 ... 9.00 12.0 12.0 18.0 6.00 5.00 4.00 3.00 3.00 7.00
H2AL1SP 0.00 0.00 0.00 0.00 0.00 0.00 1.00 0.00 0.00 0.00 ... 0.00 0.0 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00
NIPBL-DT 462.00 650.00 478.00 431.00 777.00 829.00 782.00 709.00 743.00 776.00 ... 1120.00 1375.0 1594.0 1686.0 497.00 653.00 673.00 889.00 1099.00 1024.00
CERNA2 2.00 7.00 8.00 3.00 13.00 6.00 24.00 9.00 8.00 12.00 ... 18.00 22.0 29.0 31.0 4.00 10.00 10.00 3.00 1.00 7.06
LINC02689 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ... 0.00 0.0 0.0 0.0 0.00 0.00 0.00 0.00 0.00 0.00

26574 rows × 69 columns

In [337]:
res = {}
In [338]:
experiments
Out[338]:
['RNP_SPI1',
 'RNP_RUNX2',
 'RNP_GFI1',
 'RNP_IRF2BP2',
 'RNP_MYC',
 'RNP_LMO2',
 'RNP_IKZF1',
 'RNP_MYBL2',
 'RNP_MEIS1',
 'RNP_IRF8',
 'RNP_ELF2',
 'RNP_SP1',
 'RNP_LYL1',
 'RNP_CEBPA',
 'RNP_ZEB2',
 'RNP_MEF2D',
 'RNP_ZMYND8',
 'RNP_RUNX1',
 'RNP_FLI1',
 'RNP_HOXA9',
 'RNP_MYB',
 'RNP_MAX',
 'RNP_MEF2C']
In [187]:
res
Out[187]:
{'SPI1': (-1.2741653425093569, 0.2595628445427471),
 'HOXA9': (0.41862648305962474, 0.14490862380188851),
 'MYC': (-1.430576835252246, 0.10549660323839703),
 'GFI1': (0.061434499699685764, 0.11065088877815657),
 'ELF2': (0.18835876643089494, 0.10013191844645487),
 'IRF2BP2': (-1.2421436514123199, 0.2116786922337),
 'RUNX2': (-0.21771114300468575, 0.12354032980074721),
 'CEBPA': (0.04728101063315868, 0.22445160295741662),
 'SP1': (-0.9074679568707595, 0.26590656079563213),
 'IKZF1': (0.26488156665796003, 0.11936483909099824),
 'ZMYND8': (0.05464554271508272, 0.1512365231509835),
 'ZEB2': (-0.1701758517854591, 0.1445402147201962),
 'MEF2D': (-0.08897607523943744, 0.12817467579731256),
 'MYBL2': (0.4145378723566837, 0.10998698893732116),
 'LMO2': (0.2066036480588095, 0.10530622574043316),
 'MAX': (-0.6875484167700773, 0.11182951672314183),
 'RUNX1': (0.013627339651964025, 0.1356890688647267),
 'MEIS1': (0.20823559991440868, 0.16460447494728012),
 'MYB': (-0.5666496866194601, 0.16455438308564643),
 'LYL1': (0.11547676609947306, 0.09762554626023551),
 'MEF2C': (0.21633221486591706, 0.16009568270385865),
 'IRF8': (0.08350447764203282, 0.11069908626789565),
 'FLI1': (0.2669788365781275, 0.08613995212995244)}
In [23]:
for val in experiments:
    print(val)
    totest = data[[v for v in data.columns[:-1] if val+'-' in v or 'AAVS1' in v]]
    cls = ['Condition' if val+'-' in v else 'DMSO' for v in totest.columns]
    if abs(scaling[val.split('_')[1]][0]) > 3*scaling[val.split('_')[1]][1]:
        print("rescaling this one")
        cols = [i for i in totest.columns if val+'-' in i]
        totest[cols] = totest[cols]*(2**scaling[val.split('_')[1]][0])
    else:
        continue
    res[val] = gseapy.gsea(data=totest, gene_sets='WikiPathways_2013', 
                cls= cls, no_plot=False, processes=8)
    res[val].res2d['Term'] = [i for i in res[val].res2d.index]
    sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
                hue_order="geneset_size").set_title(val)
RNP_MYB
rescaling this one
/home/jeremie/.local/lib/python3.8/site-packages/pandas/core/frame.py:2963: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
In [24]:
with open('../data/pathways/wikipathway_RNPv2', 'wb') as f:
    pickle.dump(res,f)
In [22]:
with open('../data/pathways/wikipathway_RNPv2','rb') as f:
    res = pickle.load(f)

Analysis on the wiki pathways geneset

In [30]:
import matplotlib.pyplot as plt
%matplotlib inline
for val in experiments:
    res[val].res2d['Term'] = [i[3:].split('WP')[0] for i in res[val].res2d['Term']]
    sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
                hue_order="geneset_size").set_title(val)
    plt.show()
In [31]:
a = set()
for k, val in res.items():
    a.update(set(val.res2d.index))
a = {i:[0]*len(res) for i in a}
for n,(k, val) in enumerate(res.items()):
    for i,v in val.res2d.iterrows():
        a[i][n] = v.es
In [32]:
res = pd.DataFrame(a, index=res.keys())
In [33]:
res.columns = [i[3:].split('WP')[0] for i in res.columns]
In [34]:
res.index = [i.split('_')[1] for i in res.index]
In [35]:
fig = sns.clustermap(figsize=(25,20), data=res,vmin=-1,vmax=1,xticklabels=res.columns, yticklabels=res.index)
In [36]:
res.to_csv('../results/RNPv2/wikipathway_gsea.csv')
In [37]:
fig.savefig("../results/RNPv2/enriched_terms_scaled_gsea.pdf")
In [427]:
res = {}

Analysis on the entire set of pathways (biopathways)

In [40]:
for i, val in enumerate(['RNP_MYB']):
    print(val)
    totest = data[[v for v in data.columns[:-1] if val+'-' in v or 'AAVS1' in v]]
    cls = ['Condition' if val+'-' in v else 'DMSO' for v in totest.columns]
    if abs(scaling[val.split('_')[1]][0]) > 3*scaling[val.split('_')[1]][1]:
        print("rescaling this one")
        cols = [i for i in totest.columns if val+'-' in i]
        totest[cols] = totest[cols]*(2**scaling[val.split('_')[1]][0])
    elif val in res:
        continue
    res[val] = gseapy.gsea(data=totest, gene_sets='GO_Biological_Process_2015', 
                cls= cls, no_plot=False, processes=8)
    res[val].res2d['Term'] = [i for i in res[val].res2d.index]
    plt.figure(i)
    sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
                hue_order="geneset_size").set_title(val)
RNP_MYB
rescaling this one
/home/jeremie/.local/lib/python3.8/site-packages/pandas/core/frame.py:2963: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
In [207]:
for i, val in enumerate(experiments):
    print(val)
    totest = data[[v for v in data.columns[:-1] if val+'-' in v or 'AAVS1' in v]]
    cls = ['Condition' if val+'-' in v else 'DMSO' for v in totest.columns]
    if abs(scaling[val.split('_')[1]][0]) > 3*scaling[val.split('_')[1]][1]:
        print("rescaling this one")
        cols = [i for i in totest.columns if val+'-' in i]
        totest[cols] = totest[cols]*(2**scaling[val.split('_')[1]][0])
    elif val in res:
        continue
    res[val] = gseapy.gsea(data=totest, gene_sets='GO_Biological_Process_2015', 
                cls= cls, no_plot=False, processes=8)
    res[val].res2d['Term'] = [i for i in res[val].res2d.index]
    plt.figure(i)
    sns.barplot(data=res[val].res2d.iloc[:25], x="es", y="Term",
                hue_order="geneset_size").set_title(val)
RNP_SPI1
rescaling this one
RNP_HOXA9
RNP_MYC
rescaling this one
RNP_GFI1
RNP_ELF2
RNP_IRF2BP2
rescaling this one
RNP_RUNX2
RNP_CEBPA
RNP_SP1
rescaling this one
RNP_IKZF1
RNP_ZMYND8
RNP_ZEB2
RNP_MEF2D
RNP_MYBL2
rescaling this one
RNP_LMO2
RNP_MAX
rescaling this one
RNP_RUNX1
RNP_MEIS1
RNP_MYB
rescaling this one
RNP_LYL1
RNP_MEF2C
RNP_IRF8
RNP_FLI1
rescaling this one
/home/jeremie/.local/lib/python3.7/site-packages/ipykernel_launcher.py:14: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  
In [41]:
with open('../data/pathways/GO_Biological_Process_2015_RNPv2', 'wb') as f:
    pickle.dump(res,f)
In [39]:
with open('../data/pathways/GO_Biological_Process_2015_RNPv2','rb') as f:
    res = pickle.load(f)
In [42]:
for i, v in res.items():
    res[i].res2d['Term'] = [i.split('(GO')[0] for i in v.res2d['Term']]

creating matrices

In [43]:
a = set()
for k, val in res.items():
    a.update(set(val.res2d.Term))
a = {i:[0]*len(res) for i in a}
for n,(k, val) in enumerate(res.items()):
    for i,v in val.res2d.iterrows():
        a[v.Term][n] = v.es
res = pd.DataFrame(a, index=res.keys())
In [44]:
fig = sns.clustermap(figsize=(25,20), data=res,vmin=-1,vmax=1, yticklabels=res.index)
/home/jeremie/.local/lib/python3.8/site-packages/seaborn/matrix.py:649: UserWarning: Clustering large matrix with scipy. Installing `fastcluster` may give better performance.
  warnings.warn(msg)
In [45]:
fig.savefig("../results/RNPv2/enriched_terms_scaled_gsea.pdf")
In [46]:
model = DBSCAN()
labels = model.fit_predict(res)
ii = itertools.count(res.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
In [50]:
sort = labels.argsort()
In [48]:
sort = np.array([0, 2, 3, 6, 9, 12, 1, 7, 11, 16, 10, 14, 19, 4, 5, 8, 13, 15, 17, 18, 20, 21, 22])

Correlation matrix across experiment on at the gene set level

In [79]:
sns.clustermap(res.T.corr())
Out[79]:
<seaborn.matrix.ClusterGrid at 0x7f89bb388a60>
In [51]:
a = h.plotCorrelationMatrix(res.values[sort], res.index[sort].tolist(), interactive=True, title="RNP2_bioproc_corr")#,colors=[labels[i] for i in sort]) 
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('alphas', 529), ('colors', 529), ('data', 23), ('xname', 529), ('yname', 529)

similarity distance plot over the genesets

In [52]:
red = PCA(2).fit_transform(res)
h.scatter(red, labels=res.index, radi=1, colors=labels)
Out[52]:
Figure(
id = '1415', …)
In [59]:
red = TSNE(2,2).fit_transform(res)
h.scatter(red, labels=res.index, radi=9, colors=labels)
Out[59]:
Figure(
id = '2633', …)
In [60]:
res.to_csv('../results/RNPv2/biopathway_gsea.csv')
In [62]:
res = pd.read_csv('../results/RNPv2/biopathway_gsea.csv',index_col=0)

Getting the correlation at the transcriptome level

In [65]:
data = pd.DataFrame(index=results['RNP_SP1'].index.tolist())
for i, v in results.items():
    data[i]=v.log2FoldChange
In [68]:
model = AgglomerativeClustering(n_clusters=8,linkage="average", 
                                affinity="cosine", compute_full_tree=True)
labels = model.fit_predict(res)
ii = itertools.count(res.shape[0])
tree = [{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]
sort = labels.argsort()
In [69]:
a = h.plotCorrelationMatrix(data.values.T[sort], data.columns[sort].tolist(), interactive=True, title="transcriptome correlation")#,colors=[labels[i] for i in sort]) 
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('alphas', 529), ('colors', 529), ('data', 23), ('xname', 529), ('yname', 529)
/home/jeremie/.local/lib/python3.8/site-packages/bokeh/io/saving.py:125: UserWarning: save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN
  warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
/home/jeremie/.local/lib/python3.8/site-packages/bokeh/io/saving.py:138: UserWarning: save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'
  warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")
In [72]:
## Filtered version (set to 0 genes with low p_value)
In [70]:
data = pd.DataFrame(index=results['RNP_SP1'].index.tolist())
for i, v in results.items():
    v.loc[v[v.pvalue>0.01].index,"log2FoldChange"]==0
    data[i]=v.log2FoldChange
In [71]:
a = h.plotCorrelationMatrix(data.values.T[sort], data.columns[sort].tolist(), interactive=True, title="transcriptome correlation")
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('alphas', 529), ('colors', 529), ('data', 23), ('xname', 529), ('yname', 529)

TODO 2000 most var genes.. PCA..

In [76]:
sns.clustermap(data.corr())
Out[76]:
<seaborn.matrix.ClusterGrid at 0x7f89b87cca30>
In [75]:
data.corr()
Out[75]:
RNP_CEBPA RNP_ELF2 RNP_FLI1 RNP_GFI1 RNP_HOXA9 RNP_IKZF1 RNP_IRF2BP2 RNP_IRF8 RNP_LMO2 RNP_LYL1 ... RNP_MEIS1 RNP_MYB RNP_MYBL2 RNP_MYC RNP_RUNX1 RNP_RUNX2 RNP_SP1 RNP_SPI1 RNP_ZEB2 RNP_ZMYND8
RNP_CEBPA 1.000000 -0.008537 0.048766 0.107902 -0.037238 0.016077 0.158899 -0.049794 0.072230 0.105927 ... -0.067496 0.101652 -0.002078 -0.030426 -0.033040 -0.077050 -0.062460 -0.022006 0.066437 -0.056477
RNP_ELF2 -0.008537 1.000000 0.215359 0.141933 0.222553 0.183503 -0.038176 0.110852 0.166513 0.197812 ... 0.184303 -0.155011 0.137739 -0.091962 0.231626 0.186592 0.097690 -0.062440 -0.010590 0.182120
RNP_FLI1 0.048766 0.215359 1.000000 0.137178 0.173498 0.080616 -0.014686 0.105415 0.167749 0.203901 ... 0.138742 -0.160537 0.103584 -0.069645 0.137458 0.123273 0.081545 0.015253 0.016509 0.137007
RNP_GFI1 0.107902 0.141933 0.137178 1.000000 0.157041 0.093638 0.058113 -0.016914 0.138362 0.141911 ... 0.118756 0.021109 0.076480 -0.160067 0.084592 0.071401 0.026020 -0.127463 -0.011549 0.009134
RNP_HOXA9 -0.037238 0.222553 0.173498 0.157041 1.000000 0.213662 -0.008793 0.170174 0.157017 0.170661 ... 0.267093 -0.093898 0.085874 -0.131686 0.112273 0.108828 0.032246 -0.104074 -0.034953 0.146966
RNP_IKZF1 0.016077 0.183503 0.080616 0.093638 0.213662 1.000000 -0.053482 0.101399 0.043702 0.069725 ... 0.162034 -0.067345 0.065476 -0.075494 0.074574 0.089289 0.002667 -0.097497 -0.092110 0.111766
RNP_IRF2BP2 0.158899 -0.038176 -0.014686 0.058113 -0.008793 -0.053482 1.000000 0.056821 -0.014555 -0.040366 ... 0.011694 0.242713 -0.042318 0.270860 -0.039152 -0.021313 0.038669 -0.007286 -0.024535 -0.082777
RNP_IRF8 -0.049794 0.110852 0.105415 -0.016914 0.170174 0.101399 0.056821 1.000000 0.097484 0.082613 ... 0.136082 -0.158932 0.012490 -0.040294 0.014059 0.106629 0.044310 0.117184 -0.014147 0.082807
RNP_LMO2 0.072230 0.166513 0.167749 0.138362 0.157017 0.043702 -0.014555 0.097484 1.000000 0.251558 ... 0.156986 -0.147163 0.094381 -0.136840 0.108782 0.098091 0.045805 -0.037086 0.032999 0.126139
RNP_LYL1 0.105927 0.197812 0.203901 0.141911 0.170661 0.069725 -0.040366 0.082613 0.251558 1.000000 ... 0.136141 -0.151312 0.118542 -0.088426 0.162914 0.092221 0.067900 -0.031155 0.045789 0.134117
RNP_MAX -0.051815 0.075814 0.091799 0.038743 0.081930 0.009678 0.102931 0.059595 0.073636 0.080743 ... 0.092537 -0.063377 0.033817 0.194471 0.058472 0.035712 0.099445 0.004189 -0.046842 0.081041
RNP_MEF2C -0.019360 0.204392 0.153736 0.124681 0.206500 0.148360 -0.009866 0.149248 0.131753 0.147364 ... 0.179286 -0.125295 0.107636 -0.066348 0.127374 0.128158 0.033909 -0.074747 -0.006860 0.157437
RNP_MEF2D -0.134318 0.163622 0.097779 -0.047653 0.120642 0.072161 0.036998 0.244304 0.031683 0.049696 ... 0.116311 -0.115348 0.038506 0.047564 0.108192 0.147687 0.078911 0.042397 -0.037240 0.107919
RNP_MEIS1 -0.067496 0.184303 0.138742 0.118756 0.267093 0.162034 0.011694 0.136082 0.156986 0.136141 ... 1.000000 -0.102240 0.071364 -0.068855 0.101477 0.099244 0.054090 -0.097801 -0.061431 0.123617
RNP_MYB 0.101652 -0.155011 -0.160537 0.021109 -0.093898 -0.067345 0.242713 -0.158932 -0.147163 -0.151312 ... -0.102240 1.000000 -0.093594 0.133327 -0.156531 -0.138671 -0.090538 -0.067535 -0.089074 -0.193815
RNP_MYBL2 -0.002078 0.137739 0.103584 0.076480 0.085874 0.065476 -0.042318 0.012490 0.094381 0.118542 ... 0.071364 -0.093594 1.000000 -0.053873 0.110655 0.089374 0.006467 -0.082938 -0.034789 0.113884
RNP_MYC -0.030426 -0.091962 -0.069645 -0.160067 -0.131686 -0.075494 0.270860 -0.040294 -0.136840 -0.088426 ... -0.068855 0.133327 -0.053873 1.000000 -0.063258 -0.085610 0.025459 0.020084 -0.013748 0.015183
RNP_RUNX1 -0.033040 0.231626 0.137458 0.084592 0.112273 0.074574 -0.039152 0.014059 0.108782 0.162914 ... 0.101477 -0.156531 0.110655 -0.063258 1.000000 0.212036 0.061517 -0.055115 -0.060962 0.163928
RNP_RUNX2 -0.077050 0.186592 0.123273 0.071401 0.108828 0.089289 -0.021313 0.106629 0.098091 0.092221 ... 0.099244 -0.138671 0.089374 -0.085610 0.212036 1.000000 0.066548 -0.027437 -0.045105 0.131409
RNP_SP1 -0.062460 0.097690 0.081545 0.026020 0.032246 0.002667 0.038669 0.044310 0.045805 0.067900 ... 0.054090 -0.090538 0.006467 0.025459 0.061517 0.066548 1.000000 0.194901 -0.067507 0.018437
RNP_SPI1 -0.022006 -0.062440 0.015253 -0.127463 -0.104074 -0.097497 -0.007286 0.117184 -0.037086 -0.031155 ... -0.097801 -0.067535 -0.082938 0.020084 -0.055115 -0.027437 0.194901 1.000000 -0.070572 -0.073515
RNP_ZEB2 0.066437 -0.010590 0.016509 -0.011549 -0.034953 -0.092110 -0.024535 -0.014147 0.032999 0.045789 ... -0.061431 -0.089074 -0.034789 -0.013748 -0.060962 -0.045105 -0.067507 -0.070572 1.000000 -0.061035
RNP_ZMYND8 -0.056477 0.182120 0.137007 0.009134 0.146966 0.111766 -0.082777 0.082807 0.126139 0.134117 ... 0.123617 -0.193815 0.113884 0.015183 0.163928 0.131409 0.018437 -0.073515 -0.061035 1.000000

23 rows × 23 columns

In [ ]: